From a6581f1c938098183edabbface4c9d267b91bb22 Mon Sep 17 00:00:00 2001 From: grunch Date: Sat, 25 Apr 2026 15:01:38 -0300 Subject: [PATCH 1/9] feat(bond): anti-abuse bond phase 1 taker lifecycle MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Wires the opt-in taker bond into the take flow with a strict "always release" guarantee. When [anti_abuse_bond] is enabled and apply_to matches the taker side, take_buy_action / take_sell_action defer the trade hold invoice and request a Lightning hold-invoice bond from the taker first; once the bond locks, a subscriber-driven continuation resumes the original take path. Every order exit (release_action, cancel_action's cooperative + unilateral branches, admin_settle_action, admin_cancel_action, scheduler::job_cancel_orders) cancels the bond hold invoice and marks the row Released. No slashing yet — that lands in Phase 2+. Phase 1 takes the alternative path documented in spec §6.2: orders stay in Status::Pending while the bond is outstanding, the bond bolt11 ships to the taker via the existing Action::PayInvoice (the bond hash makes it unambiguous), and bond state lives entirely in the bonds table. Dedicated AddBondInvoice / WaitingTakerBond variants will land with the matching mostro-core release in a later phase. Other Phase 1 details: - A guard at take time rejects a second take with PendingOrderExists when an active bond row already exists for the order, preventing duplicate bonds when two takers race. - main.rs calls bond::resubscribe_active_bonds on startup so a daemon restart never strands a taker who paid the bond just before shutdown. - New db helpers find_bond_by_hash, find_active_bonds and find_active_bonds_for_order back the subscriber, restart hook and exit-path release respectively. - Settings::get_bond / is_bond_enabled now return None / false when MOSTRO_CONFIG is not yet initialized, so the bond gate never panics in unit tests that don't bring up the full configuration. - Default-off: nodes that don't set [anti_abuse_bond].enabled = true see zero behavior change. Tests: cargo test (245 passed), cargo clippy --all-targets --all-features (clean). --- docs/ANTI_ABUSE_BOND.md | 48 +++- src/app/admin_cancel.rs | 11 + src/app/admin_settle.rs | 11 + src/app/bond/db.rs | 99 +++++++ src/app/bond/flow.rs | 569 ++++++++++++++++++++++++++++++++++++++++ src/app/bond/mod.rs | 5 + src/app/cancel.rs | 41 +++ src/app/release.rs | 9 + src/app/take_buy.rs | 40 +++ src/app/take_sell.rs | 36 +++ src/config/settings.rs | 15 +- src/main.rs | 8 + src/scheduler.rs | 14 + 13 files changed, 886 insertions(+), 20 deletions(-) create mode 100644 src/app/bond/flow.rs diff --git a/docs/ANTI_ABUSE_BOND.md b/docs/ANTI_ABUSE_BOND.md index 773b50fa..dad4aaef 100644 --- a/docs/ANTI_ABUSE_BOND.md +++ b/docs/ANTI_ABUSE_BOND.md @@ -79,24 +79,24 @@ The issue proposes three phases. We split them further so each PR is small enough to review without a marathon session. Data-model and payout plumbing come early (Phase 0 & 3) and are reused by every subsequent slash path. -| Phase | PR scope | Depends on | -|------:|----------|------------| -| 0 | Foundation: config schema, `bonds` table, pure helpers, types | — | -| 1 | Taker bond lifecycle: **lock + always release** (no slashing yet) | 0 | -| 2 | Taker bond: slash on **lost dispute** | 1 | -| 3 | Payout flow: `add-invoice` to winner, routing-fee estimation, retries, audit event | 2 | -| 4 | Taker bond: slash on **timeout** (apply_to=take, slash_on_waiting_timeout) | 3 | -| 5 | Maker bond (non-range): lock + dispute slash reusing phase 3 payout | 3 | -| 6 | Maker bond for **range orders** with proportional slashes | 5 | -| 7 | Maker bond: slash on **timeout** | 5 | -| 8 | Public config exposure (Mostro info event) + operator docs polish | 7 | +| Phase | PR scope | Depends on | Status | +|------:|----------|------------|--------| +| 0 | Foundation: config schema, `bonds` table, pure helpers, types | — | ✅ shipped (PR #712) | +| 1 | Taker bond lifecycle: **lock + always release** (no slashing yet) | 0 | ✅ shipped | +| 2 | Taker bond: slash on **lost dispute** | 1 | pending | +| 3 | Payout flow: `add-invoice` to winner, routing-fee estimation, retries, audit event | 2 | pending | +| 4 | Taker bond: slash on **timeout** (apply_to=take, slash_on_waiting_timeout) | 3 | pending | +| 5 | Maker bond (non-range): lock + dispute slash reusing phase 3 payout | 3 | pending | +| 6 | Maker bond for **range orders** with proportional slashes | 5 | pending | +| 7 | Maker bond: slash on **timeout** | 5 | pending | +| 8 | Public config exposure (Mostro info event) + operator docs polish | 7 | pending | Phases 4, 5, 6, 7 can partially overlap in time but must land in this order on `main` to keep review scope honest. --- -## 5. Phase 0 — Foundation +## 5. Phase 0 — Foundation ✅ Completed Purely additive. Touches no trade flow. @@ -183,12 +183,34 @@ Purely additive. Touches no trade flow. --- -## 6. Phase 1 — Taker bond: lock + always release +## 6. Phase 1 — Taker bond: lock + always release ✅ Completed Wire the bond into the take flow but **never slash**. This lets operators turn the feature on in staging to exercise hold-invoice custody with zero risk to users. +**Implementation notes (as shipped):** + +- The phase took the §6.2 "Alternative" path: orders stay in `Status::Pending` + while the bond is outstanding, and the bond bolt11 is delivered to the + taker via the existing `Action::PayInvoice` (the bond's payment hash + uniquely distinguishes it from the trade hold invoice that follows). The + dedicated `Status::WaitingTakerBond` / `Action::AddBondInvoice` will be + introduced in the matching `mostro-core` release alongside a later + phase, at which point this can be migrated transparently. +- Bond release is wired into every Phase 1 exit: + `release_action`, `cancel_action` (cooperative + unilateral, taker- and + maker-side, including pending-order maker cancels), `admin_settle_action`, + `admin_cancel_action`, and `scheduler::job_cancel_orders`. Slashing + hooks are intentionally absent and land in Phase 2+. +- A guard in `take_buy_action` / `take_sell_action` rejects a take with + `PendingOrderExists` when an active bond row already exists for the + order, preventing duplicate bonds when two takers race. +- On daemon startup, `bond::resubscribe_active_bonds` re-attaches LND + invoice subscribers for any bond rows still in `Requested` / `Locked`, + so a restart never strands a taker who paid the bond just before the + daemon went down. + ### 6.1 Scope - Gate `enabled && apply_to ∈ { "take", "both" }`. Otherwise existing code diff --git a/src/app/admin_cancel.rs b/src/app/admin_cancel.rs index 24f58437..bbdc1ee1 100644 --- a/src/app/admin_cancel.rs +++ b/src/app/admin_cancel.rs @@ -1,6 +1,7 @@ use std::borrow::Cow; use std::str::FromStr; +use crate::app::bond; use crate::app::context::AppContext; use crate::db::{ find_dispute_by_order_id, is_assigned_solver, is_dispute_taken_by_admin, @@ -199,5 +200,15 @@ pub async fn admin_cancel_action( .await .map_err(|e| MostroInternalErr(ServiceError::NostrError(e.to_string())))?; + // Phase 1: admin cancellation always releases any taker bond. The + // dispute slash path lands in Phase 2. + if let Err(e) = bond::release_bonds_for_order(pool, order.id).await { + tracing::warn!( + "admin_cancel: bond release failed for {}: {}", + order.id, + e + ); + } + Ok(()) } diff --git a/src/app/admin_settle.rs b/src/app/admin_settle.rs index 39d8de6c..70dfaa41 100644 --- a/src/app/admin_settle.rs +++ b/src/app/admin_settle.rs @@ -1,3 +1,4 @@ +use crate::app::bond; use crate::app::context::AppContext; use crate::db::{ find_dispute_by_order_id, is_assigned_solver, is_dispute_taken_by_admin, @@ -188,6 +189,16 @@ pub async fn admin_settle_action( ) .await; } + // Phase 1: admin-settled disputes always release any taker bond. + // Slashing on lost dispute lands in Phase 2. + if let Err(e) = bond::release_bonds_for_order(pool, order_updated.id).await { + tracing::warn!( + "admin_settle: bond release failed for {}: {}", + order_updated.id, + e + ); + } + let _ = do_payment(ctx, order_updated, request_id).await; Ok(()) diff --git a/src/app/bond/db.rs b/src/app/bond/db.rs index b9caa602..c4196b2d 100644 --- a/src/app/bond/db.rs +++ b/src/app/bond/db.rs @@ -61,6 +61,54 @@ pub async fn find_bonds_by_state( .map_err(|e| MostroInternalErr(ServiceError::DbAccessError(e.to_string()))) } +/// Look up a bond row by its Lightning payment hash. The hash uniquely +/// identifies the bond hold invoice, so this is what the LND subscriber +/// uses to correlate incoming invoice events back to a `Bond`. +pub async fn find_bond_by_hash( + pool: &Pool, + hash: &str, +) -> Result, mostro_core::error::MostroError> { + sqlx::query_as::<_, Bond>("SELECT * FROM bonds WHERE hash = ? LIMIT 1") + .bind(hash) + .fetch_optional(pool) + .await + .map_err(|e| MostroInternalErr(ServiceError::DbAccessError(e.to_string()))) +} + +/// List every bond row that still has an outstanding LND HTLC, i.e. is in +/// `Requested` or `Locked`. Used on daemon startup to resubscribe to +/// in-flight bond hold invoices, and as the Phase 1 workhorse for the +/// "always release" exits — we filter further on `order_id` in +/// [`find_active_bonds_for_order`]. +pub async fn find_active_bonds( + pool: &Pool, +) -> Result, mostro_core::error::MostroError> { + sqlx::query_as::<_, Bond>( + "SELECT * FROM bonds WHERE state IN ('requested', 'locked') ORDER BY created_at ASC", + ) + .fetch_all(pool) + .await + .map_err(|e| MostroInternalErr(ServiceError::DbAccessError(e.to_string()))) +} + +/// List the still-outstanding bonds attached to a single order. Phase 1 +/// uses this to release every bond on any order exit path (cancel, +/// release, admin actions, scheduler timeouts). +pub async fn find_active_bonds_for_order( + pool: &Pool, + order_id: Uuid, +) -> Result, mostro_core::error::MostroError> { + sqlx::query_as::<_, Bond>( + "SELECT * FROM bonds \ + WHERE order_id = ? AND state IN ('requested', 'locked') \ + ORDER BY created_at ASC", + ) + .bind(order_id) + .fetch_all(pool) + .await + .map_err(|e| MostroInternalErr(ServiceError::DbAccessError(e.to_string()))) +} + /// Update a bond row by primary key. Returns the persisted `Bond`. pub async fn update_bond( pool: &Pool, @@ -176,6 +224,57 @@ mod tests { assert!(res.is_none()); } + #[tokio::test] + async fn find_by_hash_returns_match() { + let pool = setup_pool().await; + let order_id = Uuid::new_v4(); + insert_parent_order(&pool, order_id).await; + + let mut bond = dummy_bond(order_id, BondRole::Taker); + bond.hash = Some("c".repeat(64)); + let created = create_bond(&pool, bond).await.expect("insert"); + + let found = find_bond_by_hash(&pool, &"c".repeat(64)) + .await + .expect("query") + .expect("row present"); + assert_eq!(found.id, created.id); + + let missing = find_bond_by_hash(&pool, &"f".repeat(64)) + .await + .expect("query"); + assert!(missing.is_none()); + } + + #[tokio::test] + async fn active_bonds_filter_terminal_states() { + let pool = setup_pool().await; + let order_a = Uuid::new_v4(); + let order_b = Uuid::new_v4(); + insert_parent_order(&pool, order_a).await; + insert_parent_order(&pool, order_b).await; + let bond_a = create_bond(&pool, dummy_bond(order_a, BondRole::Taker)) + .await + .unwrap(); + let bond_b = create_bond(&pool, dummy_bond(order_b, BondRole::Taker)) + .await + .unwrap(); + + // Bond B → Released (terminal): must drop out of active set. + let mut released = bond_b.clone(); + released.state = BondState::Released.to_string(); + update_bond(&pool, released).await.unwrap(); + + let active = find_active_bonds(&pool).await.unwrap(); + assert_eq!(active.len(), 1); + assert_eq!(active[0].id, bond_a.id); + + let active_a = find_active_bonds_for_order(&pool, order_a).await.unwrap(); + assert_eq!(active_a.len(), 1); + let active_b = find_active_bonds_for_order(&pool, order_b).await.unwrap(); + assert!(active_b.is_empty()); + } + #[tokio::test] async fn find_by_state_filters() { let pool = setup_pool().await; diff --git a/src/app/bond/flow.rs b/src/app/bond/flow.rs new file mode 100644 index 00000000..63fdf80b --- /dev/null +++ b/src/app/bond/flow.rs @@ -0,0 +1,569 @@ +//! Bond lifecycle wiring (Phase 1). +//! +//! Phase 1 adds a single guarantee: when the feature is enabled and the +//! taker side is in scope (`apply_to ∈ {take, both}`), a taker is asked to +//! lock a Lightning hold invoice as a bond before the trade flow starts; +//! and on **every** exit — happy path, unilateral cancel, cooperative +//! cancel, admin action, scheduler timeout — the bond is **released**. +//! +//! Slashing is intentionally absent: it lands in Phase 2+. This means +//! operators can flip `enabled = true` in staging and exercise hold-invoice +//! custody end-to-end without any user funds at risk if Mostro mis-judges +//! the situation. +//! +//! Protocol note: `mostro-core` 0.10.0 does not yet expose +//! `Action::AddBondInvoice` / `Status::WaitingTakerBond`. Phase 1 takes the +//! "Alternative" path documented in §6.2 of `docs/ANTI_ABUSE_BOND.md`: +//! orders stay in `Status::Pending` while waiting for the bond, and the +//! bond bolt11 ships to the taker as a regular `Action::PayInvoice` (the +//! semantics — "pay this Lightning invoice" — are an exact match). Bond +//! state lives entirely in the `bonds` table; clients identify the +//! invoice as a bond by its hash, which differs from the trade hold +//! invoice that follows once the bond is locked. The dedicated action / +//! status will land alongside the corresponding `mostro-core` release in a +//! later phase. + +use std::sync::Arc; + +use chrono::Utc; +use fedimint_tonic_lnd::lnrpc::invoice::InvoiceState; +use mostro_core::error::{MostroError, MostroError::MostroInternalErr, ServiceError}; +use mostro_core::prelude::*; +use nostr_sdk::nostr::hashes::hex::FromHex; +use nostr_sdk::prelude::*; +use sqlx::{Pool, Sqlite}; +use sqlx_crud::Crud; +use tokio::sync::mpsc::channel; +use tracing::{info, warn}; +use uuid::Uuid; + +use crate::config::settings::Settings; +use crate::lightning::{InvoiceMessage, LndConnector}; +use crate::util::{ + bytes_to_string, enqueue_order_msg, get_keys, set_waiting_invoice_status, show_hold_invoice, +}; + +use super::db::{ + create_bond, find_active_bonds, find_active_bonds_for_order, find_bond_by_hash, +}; +use super::math::compute_bond_amount; +use super::model::Bond; +use super::types::{BondRole, BondState}; + +/// True when the configuration requires the **taker** to post a bond. +/// +/// This is the single Phase 1 gate. Every bond touchpoint in the take +/// flow asks this question first, so a misconfigured node (no +/// `[anti_abuse_bond]` block at all) behaves exactly like before. +pub fn taker_bond_required() -> bool { + Settings::get_bond() + .filter(|cfg| cfg.enabled) + .is_some_and(|cfg| cfg.apply_to.applies_to_taker()) +} + +/// Create a hold invoice for the taker's bond, persist a `Bond` row in +/// `Requested`, ship the bolt11 to the taker, and start the LND +/// subscriber that flips the row to `Locked` once the taker pays. +/// +/// On any failure inside this function the bond row may exist in +/// `Requested` with no LND counterpart — that's fine: Phase 1's +/// "always release" guarantee covers it on the next exit. +pub async fn request_taker_bond( + pool: &Pool, + order: &Order, + taker_pubkey: PublicKey, + request_id: Option, + trade_index: Option, +) -> Result { + let cfg = Settings::get_bond().ok_or_else(|| { + MostroInternalErr(ServiceError::UnexpectedError( + "anti_abuse_bond block is missing while bond was deemed required".into(), + )) + })?; + + let amount = compute_bond_amount(order.amount, cfg); + let memo = format!("Bond for Mostro order {}", order.id); + + let mut ln_client = LndConnector::new().await?; + let (invoice_resp, preimage, hash) = ln_client + .create_hold_invoice(&memo, amount) + .await + .map_err(|e| MostroInternalErr(ServiceError::HoldInvoiceError(e.to_string())))?; + + let mut bond = Bond::new_requested( + order.id, + taker_pubkey.to_string(), + BondRole::Taker, + amount, + ); + bond.hash = Some(bytes_to_string(&hash)); + bond.preimage = Some(bytes_to_string(&preimage)); + bond.payment_request = Some(invoice_resp.payment_request.clone()); + + let bond = create_bond(pool, bond).await?; + + info!( + "Bond requested: bond_id={} order_id={} role={} amount_sats={}", + bond.id, order.id, bond.role, bond.amount_sats + ); + + // Phase-1 alternative path (see module-level doc): the bond bolt11 + // ships as a regular `PayInvoice`. The `SmallOrder` echoes the order + // id so a bond-aware client can correlate — and a non-bond-aware + // client just sees an extra invoice to pay before the trade. + let order_kind = order.get_order_kind().map_err(MostroInternalErr)?; + let bond_small = SmallOrder::new( + Some(order.id), + Some(order_kind), + Some(Status::Pending), + amount, + order.fiat_code.clone(), + order.min_amount, + order.max_amount, + order.fiat_amount, + order.payment_method.clone(), + order.premium, + None, + None, + None, + None, + None, + ); + + enqueue_order_msg( + request_id, + Some(order.id), + Action::PayInvoice, + Some(Payload::PaymentRequest( + Some(bond_small), + invoice_resp.payment_request, + None, + )), + taker_pubkey, + trade_index, + ) + .await; + + bond_invoice_subscribe(hash, request_id).await?; + + Ok(bond) +} + +/// Release a single bond: cancel the hold invoice (best-effort) and +/// transition the row to `Released`. +/// +/// **Idempotent.** A bond already in a terminal state (`Released`, +/// `Slashed`, `Failed`) is a no-op. This matters because Phase 1 wires +/// release into every exit, and the same bond can plausibly be hit by +/// more than one path (e.g. cooperative cancel after the LND subscriber +/// already saw `Canceled`). +pub async fn release_bond(pool: &Pool, bond: &Bond) -> Result<(), MostroError> { + if matches!( + bond.state.as_str(), + "released" | "slashed" | "failed" + ) { + return Ok(()); + } + + if let Some(hash) = bond.hash.as_ref() { + match LndConnector::new().await { + Ok(mut ln) => { + if let Err(e) = ln.cancel_hold_invoice(hash).await { + // Hold invoice already canceled / unknown to LND is the + // common race with the subscriber; we still want the row + // to land in `Released` so callers can move on. + warn!( + "Bond {} cancel_hold_invoice failed: {} — marking Released anyway", + bond.id, e + ); + } + } + Err(e) => { + warn!( + "Bond {} could not connect to LND for cancel: {} — marking Released anyway", + bond.id, e + ); + } + } + } + + let mut updated = bond.clone(); + updated.state = BondState::Released.to_string(); + updated.released_at = Some(Utc::now().timestamp()); + let id = updated.id; + let order_id = updated.order_id; + updated + .update(pool) + .await + .map_err(|e| MostroInternalErr(ServiceError::DbAccessError(e.to_string())))?; + + info!( + "Bond {} released for order {} (was state={})", + id, order_id, bond.state + ); + Ok(()) +} + +/// Release every active (`Requested` or `Locked`) bond attached to an +/// order. Designed to be the **single** call sites use from each exit +/// path — the gate, the lookup, and the per-row release are all here. +/// +/// Returns `Ok(())` when the feature is disabled or no active bonds +/// exist; never fails the caller for individual bond failures (those +/// are logged and the loop continues). +pub async fn release_bonds_for_order( + pool: &Pool, + order_id: Uuid, +) -> Result<(), MostroError> { + if !Settings::is_bond_enabled() { + return Ok(()); + } + + let bonds = find_active_bonds_for_order(pool, order_id).await?; + for bond in bonds.iter() { + if let Err(e) = release_bond(pool, bond).await { + warn!("Failed to release bond {}: {}", bond.id, e); + } + } + Ok(()) +} + +/// Spawn the LND subscriber for a bond hold invoice. The subscriber +/// transitions the bond row through `Locked` / `Released` based on the +/// invoice state and, on `Locked`, resumes the original take flow. +/// +/// Mirrors the structure of `crate::util::invoice_subscribe` so restart +/// resilience can later reuse the same shape. +pub async fn bond_invoice_subscribe( + hash: Vec, + request_id: Option, +) -> Result<(), MostroError> { + let mut ln_client = LndConnector::new().await?; + let (tx, mut rx) = channel::(100); + + tokio::spawn(async move { + if let Err(e) = ln_client.subscribe_invoice(hash, tx).await { + warn!("Bond invoice subscriber ended with error: {e}"); + } + }); + + let pool = crate::config::settings::get_db_pool(); + + tokio::spawn(async move { + while let Some(msg) = rx.recv().await { + let hash_hex = bytes_to_string(msg.hash.as_ref()); + match msg.state { + InvoiceState::Accepted => { + if let Err(e) = on_bond_invoice_accepted(&hash_hex, &pool, request_id).await { + warn!("Bond invoice accepted handler error: {e}"); + } + } + InvoiceState::Canceled => { + if let Err(e) = on_bond_invoice_canceled(&hash_hex, &pool).await { + warn!("Bond invoice canceled handler error: {e}"); + } + } + InvoiceState::Settled => { + info!("Bond hash {hash_hex}: invoice settled"); + } + InvoiceState::Open => { + info!("Bond hash {hash_hex}: invoice open (waiting for payment)"); + } + } + } + }); + + Ok(()) +} + +/// Restart hook: re-subscribe to every bond that was still active when +/// the daemon stopped. Called from `main` next to `find_held_invoices`. +pub async fn resubscribe_active_bonds(pool: &Arc>) -> Result<(), MostroError> { + if !Settings::is_bond_enabled() { + return Ok(()); + } + let bonds = find_active_bonds(pool.as_ref()).await?; + for bond in bonds.into_iter() { + if let Some(hash) = bond.hash.as_ref() { + // Hex string back to bytes for LND. + match Vec::::from_hex(hash) { + Ok(bytes) => { + if let Err(e) = bond_invoice_subscribe(bytes, None).await { + warn!("Failed to resubscribe bond {}: {}", bond.id, e); + } else { + info!("Resubscribed bond {} (state={})", bond.id, bond.state); + } + } + Err(e) => warn!("Bond {} has malformed hash: {}", bond.id, e), + } + } + } + Ok(()) +} + +/// Subscriber callback for `InvoiceState::Accepted`: bond is locked. +/// +/// Transitions the row to `Locked` and resumes the original take flow +/// (creates the trade hold invoice / asks the buyer for a payout +/// invoice, depending on the side). +async fn on_bond_invoice_accepted( + hash: &str, + pool: &Pool, + request_id: Option, +) -> Result<(), MostroError> { + let mut bond = match find_bond_by_hash(pool, hash).await? { + Some(b) => b, + None => { + warn!("Bond invoice accepted for unknown hash {hash}"); + return Ok(()); + } + }; + + if bond.state == BondState::Locked.to_string() { + // Subscriber may emit Accepted more than once on reconnect; idempotent. + return Ok(()); + } + if bond.state != BondState::Requested.to_string() { + warn!( + "Bond {} accepted but state was {} — ignoring", + bond.id, bond.state + ); + return Ok(()); + } + + bond.state = BondState::Locked.to_string(); + bond.locked_at = Some(Utc::now().timestamp()); + let bond = bond + .update(pool) + .await + .map_err(|e| MostroInternalErr(ServiceError::DbAccessError(e.to_string())))?; + + info!("Bond {} locked for order {}", bond.id, bond.order_id); + + let order = Order::by_id(pool, bond.order_id) + .await + .map_err(|e| MostroInternalErr(ServiceError::DbAccessError(e.to_string())))? + .ok_or_else(|| { + MostroInternalErr(ServiceError::UnexpectedError(format!( + "Bond {} references missing order {}", + bond.id, bond.order_id + ))) + })?; + + let my_keys = get_keys()?; + resume_take_after_bond(pool, order, &my_keys, request_id).await +} + +/// Subscriber callback for `InvoiceState::Canceled`: bond never locked +/// (taker abandoned the invoice, or LND auto-canceled on expiration). +/// +/// Phase 1 keeps the order untouched: it stays `Pending` with the taker +/// fields populated. The maker's order remains discoverable via the +/// existing Nostr event. A follow-up phase (or operator action) can +/// reset the order if needed; for Phase 1, "always release" is the only +/// guarantee we owe. +async fn on_bond_invoice_canceled(hash: &str, pool: &Pool) -> Result<(), MostroError> { + let bond = match find_bond_by_hash(pool, hash).await? { + Some(b) => b, + None => return Ok(()), + }; + + if matches!(bond.state.as_str(), "released" | "slashed" | "failed") { + return Ok(()); + } + + let mut updated = bond.clone(); + updated.state = BondState::Released.to_string(); + updated.released_at = Some(Utc::now().timestamp()); + updated + .update(pool) + .await + .map_err(|e| MostroInternalErr(ServiceError::DbAccessError(e.to_string())))?; + + info!( + "Bond {} marked Released after LND cancel (order {})", + bond.id, bond.order_id + ); + Ok(()) +} + +/// Resume the take flow after the bond locks. +/// +/// The taker side already populated all trade fields on the order before +/// requesting the bond, so this function only needs to drive the trade +/// hold invoice / payout-invoice request that `take_*_action` deferred. +async fn resume_take_after_bond( + pool: &Pool, + mut order: Order, + my_keys: &Keys, + request_id: Option, +) -> Result<(), MostroError> { + let kind = order.get_order_kind().map_err(MostroInternalErr)?; + let buyer_pubkey = order.get_buyer_pubkey().map_err(MostroInternalErr)?; + let seller_pubkey = order.get_seller_pubkey().map_err(MostroInternalErr)?; + + match kind { + // Buy order → taker = seller, no buyer-invoice required up front: + // mirror the post-take path in take_buy_action. + mostro_core::order::Kind::Buy => { + show_hold_invoice( + my_keys, + None, + &buyer_pubkey, + &seller_pubkey, + order, + request_id, + ) + .await + } + // Sell order → taker = buyer. If the buyer included an invoice in + // the take message we already persisted it on `order.buyer_invoice`; + // otherwise we ask for one. This mirrors take_sell_action. + mostro_core::order::Kind::Sell => { + if order.buyer_invoice.is_some() { + let payment_request = order.buyer_invoice.clone(); + show_hold_invoice( + my_keys, + payment_request, + &buyer_pubkey, + &seller_pubkey, + order, + request_id, + ) + .await + } else { + set_waiting_invoice_status(&mut order, buyer_pubkey, request_id) + .await + .map_err(|_| MostroInternalErr(ServiceError::UpdateOrderStatusError))?; + let order_updated = + crate::util::update_order_event(my_keys, Status::WaitingBuyerInvoice, &order) + .await + .map_err(|e| MostroInternalErr(ServiceError::NostrError(e.to_string())))?; + order_updated + .update(pool) + .await + .map_err(|e| MostroInternalErr(ServiceError::DbAccessError(e.to_string())))?; + Ok(()) + } + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::app::bond::types::BondRole; + use sqlx::sqlite::SqlitePoolOptions; + + async fn setup_pool() -> Pool { + let pool = SqlitePoolOptions::new() + .max_connections(1) + .connect(":memory:") + .await + .expect("open in-memory sqlite"); + sqlx::query(include_str!( + "../../../migrations/20221222153301_orders.sql" + )) + .execute(&pool) + .await + .expect("orders migration"); + sqlx::query(include_str!( + "../../../migrations/20260423120000_anti_abuse_bond.sql" + )) + .execute(&pool) + .await + .expect("bonds migration"); + pool + } + + async fn insert_order(pool: &Pool, id: Uuid) { + sqlx::query( + r#"INSERT INTO orders ( + id, kind, event_id, status, premium, payment_method, + amount, fiat_code, fiat_amount, created_at, expires_at + ) VALUES (?, 'buy', ?, 'pending', 0, 'ln', 1000, 'USD', 10, 0, 0)"#, + ) + .bind(id) + .bind(id.simple().to_string()) + .execute(pool) + .await + .expect("insert order"); + } + + fn make_bond(order_id: Uuid, state: BondState) -> Bond { + let mut b = Bond::new_requested(order_id, "a".repeat(64), BondRole::Taker, 1_500); + b.state = state.to_string(); + b.hash = Some("c".repeat(64)); + b + } + + #[tokio::test] + async fn release_bond_is_idempotent_for_terminal_states() { + let pool = setup_pool().await; + let order_id = Uuid::new_v4(); + insert_order(&pool, order_id).await; + let bond = create_bond(&pool, make_bond(order_id, BondState::Released)) + .await + .unwrap(); + + // No LND, no panic: idempotent on terminal states. + release_bond(&pool, &bond).await.unwrap(); + + let after = find_bond_by_hash(&pool, &"c".repeat(64)) + .await + .unwrap() + .unwrap(); + assert_eq!(after.state, "released"); + assert_eq!(after.released_at, bond.released_at); + } + + #[tokio::test] + async fn release_bonds_for_order_no_op_when_disabled() { + // No `[anti_abuse_bond]` block in test settings → feature off. + // Function must succeed without touching LND or DB beyond a + // configuration check. + let pool = setup_pool().await; + // Even with active bonds in the DB, the gate keeps us out. + let order_id = Uuid::new_v4(); + insert_order(&pool, order_id).await; + let _ = create_bond(&pool, make_bond(order_id, BondState::Locked)) + .await + .unwrap(); + + // Settings::is_bond_enabled() reads MOSTRO_CONFIG which is unset + // in the unit-test harness → returns false. Verify the call path + // is a clean no-op. + release_bonds_for_order(&pool, order_id).await.unwrap(); + + // Bond untouched. + let active = find_active_bonds_for_order(&pool, order_id).await.unwrap(); + assert_eq!(active.len(), 1); + } + + #[tokio::test] + async fn release_bond_without_hash_marks_released() { + // A `Requested` bond with no hash yet (e.g. failure between + // `new_requested` and `create_hold_invoice`) must still be + // releasable: the row transitions to `Released` and no LND call + // is attempted. + let pool = setup_pool().await; + let order_id = Uuid::new_v4(); + insert_order(&pool, order_id).await; + let mut bond = make_bond(order_id, BondState::Requested); + bond.hash = None; + let bond = create_bond(&pool, bond).await.unwrap(); + + release_bond(&pool, &bond).await.unwrap(); + + let active = find_active_bonds_for_order(&pool, order_id).await.unwrap(); + assert!(active.is_empty(), "bond should no longer be active"); + } + + #[test] + fn taker_bond_required_is_false_without_config() { + // No global config initialized in unit tests → gate must be off. + // Guarantees that all bond touchpoints are inert in the absence + // of an `[anti_abuse_bond]` block. + assert!(!taker_bond_required()); + } +} diff --git a/src/app/bond/mod.rs b/src/app/bond/mod.rs index 54aa1ed8..5f256cba 100644 --- a/src/app/bond/mod.rs +++ b/src/app/bond/mod.rs @@ -10,10 +10,15 @@ //! Callers must gate on that flag. pub mod db; +pub mod flow; pub mod math; pub mod model; pub mod types; +pub use flow::{ + release_bond, release_bonds_for_order, request_taker_bond, resubscribe_active_bonds, + taker_bond_required, +}; pub use math::compute_bond_amount; pub use model::Bond; pub use types::{BondRole, BondSlashReason, BondState}; diff --git a/src/app/cancel.rs b/src/app/cancel.rs index 1aaecf52..eedbbced 100644 --- a/src/app/cancel.rs +++ b/src/app/cancel.rs @@ -1,3 +1,4 @@ +use crate::app::bond; use crate::app::context::AppContext; use crate::app::dispute::close_dispute_after_user_resolution; use crate::db::{edit_pubkeys_order, update_order_to_initial_state}; @@ -145,6 +146,16 @@ async fn cancel_cooperative_execution_step_2( ) .await; + // Phase 1: cooperative cancel always releases any taker bond. The + // dispute slash path lands in Phase 2. + if let Err(e) = bond::release_bonds_for_order(pool, order.id).await { + tracing::warn!( + "cooperative_cancel: bond release failed for {}: {}", + order.id, + e + ); + } + Ok(()) } @@ -249,6 +260,16 @@ async fn cancel_order_by_taker( // Notify the creator about the republished order after the taker-side cancellation flow completes notify_creator(&order_updated, request_id).await?; + // Phase 1: the taker cancelled before activating the trade — always + // release the bond. Slashing for timeout-based cancels is Phase 4. + if let Err(e) = bond::release_bonds_for_order(pool, order_updated.id).await { + tracing::warn!( + "taker_cancel: bond release failed for {}: {}", + order_updated.id, + e + ); + } + Ok(()) } @@ -300,6 +321,16 @@ async fn cancel_order_by_maker( ) .await; + // Phase 1: maker cancelled before the trade went active — release any + // taker bond that had already been locked. + if let Err(e) = bond::release_bonds_for_order(pool, order.id).await { + tracing::warn!( + "maker_cancel: bond release failed for {}: {}", + order.id, + e + ); + } + Ok(()) } @@ -342,6 +373,16 @@ async fn cancel_pending_order_from_maker( None, ) .await; + // Phase 1: a maker cancelling a still-Pending order may be racing + // with a taker who just locked a bond. Release any active bond so + // the taker is made whole. + if let Err(e) = bond::release_bonds_for_order(pool, order.id).await { + tracing::warn!( + "pending_maker_cancel: bond release failed for {}: {}", + order.id, + e + ); + } Ok(()) } diff --git a/src/app/release.rs b/src/app/release.rs index 7be71d03..90490d80 100644 --- a/src/app/release.rs +++ b/src/app/release.rs @@ -1,3 +1,4 @@ +use crate::app::bond; use crate::app::context::AppContext; use crate::app::dispute::close_dispute_after_user_resolution; use crate::lightning::LndConnector; @@ -268,6 +269,14 @@ pub async fn release_action( ) .await; + // Phase 1: release any taker bond attached to this order before we + // hand off to the buyer payment task. Slashing is intentionally not + // wired in yet — that's Phase 2+. A failed bond release is logged but + // does not block trade finalization. + if let Err(e) = bond::release_bonds_for_order(pool, order.id).await { + tracing::warn!("release_action: bond release failed for {}: {}", order.id, e); + } + // Finally we try to pay buyer's invoice let _ = do_payment(ctx, order, request_id).await; diff --git a/src/app/take_buy.rs b/src/app/take_buy.rs index c2f2edac..970bb966 100644 --- a/src/app/take_buy.rs +++ b/src/app/take_buy.rs @@ -1,3 +1,5 @@ +use crate::app::bond; +use crate::app::bond::db::find_active_bonds_for_order; use crate::app::context::AppContext; use crate::util::{ get_dev_fee, get_fiat_amount_requested, get_market_amount_and_fee, get_order, show_hold_invoice, @@ -6,6 +8,7 @@ use crate::util::{ use crate::db::{seller_has_pending_order, update_user_trade_index}; use mostro_core::prelude::*; use nostr_sdk::prelude::*; +use sqlx_crud::Crud; pub async fn take_buy_action( ctx: &AppContext, @@ -92,6 +95,43 @@ pub async fn take_buy_action( .await .map_err(|e| MostroInternalErr(ServiceError::DbAccessError(e.to_string())))?; + // Anti-abuse bond (Phase 1): if the operator opted into a taker bond, + // intercept the take here. We persist the partially-populated order + // (status stays `Pending`) and request the bond. The trade hold + // invoice is created later — once the bond locks — by the bond + // subscriber's continuation in `bond::flow::resume_take_after_bond`. + if bond::taker_bond_required() { + // Defend against concurrent takes for the same order: if another + // taker already has an active bond on this order, the second take + // must back off rather than create a duplicate bond row. + let existing = find_active_bonds_for_order(pool, order.id) + .await + .map_err(|_| MostroCantDo(CantDoReason::PendingOrderExists))?; + if !existing.is_empty() { + return Err(MostroCantDo(CantDoReason::PendingOrderExists)); + } + + // Stash the seller (taker) trade pubkey so the post-bond + // continuation can resume `show_hold_invoice` with the same + // arguments the legacy path would have used. + order.seller_pubkey = Some(seller_pubkey.to_string()); + + let persisted = order + .update(pool) + .await + .map_err(|e| MostroInternalErr(ServiceError::DbAccessError(e.to_string())))?; + + bond::request_taker_bond( + pool, + &persisted, + seller_pubkey, + request_id, + Some(trade_index), + ) + .await?; + return Ok(()); + } + // Show hold invoice and return success or error if let Err(cause) = show_hold_invoice( my_keys, diff --git a/src/app/take_sell.rs b/src/app/take_sell.rs index f09a4ecb..2475eae9 100644 --- a/src/app/take_sell.rs +++ b/src/app/take_sell.rs @@ -1,3 +1,5 @@ +use crate::app::bond; +use crate::app::bond::db::find_active_bonds_for_order; use crate::app::context::AppContext; use crate::db::{buyer_has_pending_order, update_user_trade_index}; use crate::util::{ @@ -122,6 +124,40 @@ pub async fn take_sell_action( .await .map_err(|e| MostroInternalErr(ServiceError::DbAccessError(e.to_string())))?; + // Anti-abuse bond (Phase 1): when enabled for the taker side, defer + // the trade hold-invoice / `WaitingBuyerInvoice` step. We persist the + // populated order (status stays `Pending`), stash the buyer payout + // invoice if the taker provided one, and request the taker's bond. + // `bond::flow::resume_take_after_bond` resumes the trade once the + // bond locks. + if bond::taker_bond_required() { + let existing = find_active_bonds_for_order(pool, order.id) + .await + .map_err(|_| MostroCantDo(CantDoReason::PendingOrderExists))?; + if !existing.is_empty() { + return Err(MostroCantDo(CantDoReason::PendingOrderExists)); + } + + if let Some(invoice) = payment_request.as_ref() { + order.buyer_invoice = Some(invoice.clone()); + } + + let persisted = order + .update(pool) + .await + .map_err(|e| MostroInternalErr(ServiceError::DbAccessError(e.to_string())))?; + + bond::request_taker_bond( + pool, + &persisted, + event.sender, + request_id, + Some(trade_index), + ) + .await?; + return Ok(()); + } + // If payment request is not present, update order status to waiting buyer invoice if payment_request.is_none() { update_order_status(&mut order, my_keys, pool, request_id).await?; diff --git a/src/config/settings.rs b/src/config/settings.rs index 6e96a761..ad8495ea 100644 --- a/src/config/settings.rs +++ b/src/config/settings.rs @@ -84,18 +84,19 @@ impl Settings { /// This function retrieves the anti-abuse bond configuration from the /// global `MOSTRO_CONFIG`. Returns `None` when the `[anti_abuse_bond]` - /// block is absent (treated as disabled). + /// block is absent (treated as disabled), and also when the global + /// settings haven't been initialized yet — unlike the other accessors + /// in this file, the bond gate is on the hot path of the take flow and + /// must never panic in unit tests that don't bring up the full + /// configuration. pub fn get_bond() -> Option<&'static AntiAbuseBondSettings> { - MOSTRO_CONFIG - .get() - .expect("No settings found") - .anti_abuse_bond - .as_ref() + MOSTRO_CONFIG.get()?.anti_abuse_bond.as_ref() } /// True when the feature is configured AND explicitly enabled. This is /// the single gate every bond-related code path must check before - /// running. Keeps the opt-in guarantee simple to audit. + /// running. Keeps the opt-in guarantee simple to audit. Returns + /// `false` when settings haven't been initialized. pub fn is_bond_enabled() -> bool { Self::get_bond().is_some_and(|cfg| cfg.enabled) } diff --git a/src/main.rs b/src/main.rs index 23e7850a..c87a6eba 100644 --- a/src/main.rs +++ b/src/main.rs @@ -140,6 +140,14 @@ async fn main() -> Result<()> { } } + // Resubscribe to any in-flight anti-abuse bond hold invoices so a + // restart doesn't strand a taker who paid the bond just before the + // daemon went down. Inert when the feature is disabled. + let bond_pool = get_db_pool(); + if let Err(e) = app::bond::resubscribe_active_bonds(&bond_pool).await { + tracing::warn!("Failed to resubscribe active bonds: {e}"); + } + // Start RPC server if enabled if RpcServer::is_enabled() { let rpc_server = RpcServer::new(); diff --git a/src/scheduler.rs b/src/scheduler.rs index ef972136..1502c6bd 100644 --- a/src/scheduler.rs +++ b/src/scheduler.rs @@ -1,3 +1,4 @@ +use crate::app::bond; use crate::app::context::AppContext; use crate::app::dev_fee::run_dev_fee_cycle; use crate::app::release::do_payment; @@ -410,8 +411,21 @@ async fn job_cancel_orders(ctx: AppContext) { &order_updated.id, new_status ); + let order_id = order_updated.id; // update order on db let _ = order_updated.update(pool).await; + // Phase 1: scheduler-driven cancels (waiting-state + // timeouts) always release the bond. Slashing on + // timeout lands in Phase 4 — and crucially MUST + // gate on the cause being a real timeout, not a + // user-driven cancel beforehand. + if let Err(e) = bond::release_bonds_for_order(pool, order_id).await { + tracing::warn!( + "scheduler: bond release failed for {}: {}", + order_id, + e + ); + } } } } From ccd91e72a4a5012f612300ac2cee3b5a450bb977 Mon Sep 17 00:00:00 2001 From: grunch Date: Sat, 25 Apr 2026 15:59:39 -0300 Subject: [PATCH 2/9] fix(bond): address phase 1 review feedback MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Tightens the Phase 1 taker-bond plumbing in response to PR feedback: - take_buy_action / take_sell_action no longer mask DB errors from find_active_bonds_for_order as PendingOrderExists; the original ServiceError::DbAccessError now propagates and PendingOrderExists is reserved for the actual "an active bond already exists" branch. - BondState::is_terminal / is_active helpers replace the stringified matches!(...) guards in release_bond and on_bond_invoice_canceled, so control flow no longer depends on Display values. State strings are parsed once via FromStr at the entry of each handler. - on_bond_invoice_accepted now drives the Requested → Locked transition through a conditional UPDATE WHERE state = 'requested', and only resumes the take continuation when the row count is 1. This closes the race where two concurrent subscriber firings (e.g. LND reconnect + restart-time resubscriber) both passed the in-memory guard and resume_take_after_bond ran twice. - Active-bond SQL no longer hardcodes the 'requested' / 'locked' literals; the queries bind BondState::Requested / Locked Display strings, mirroring find_bonds_by_state. - New release_bonds_for_order_or_warn helper consolidates the best-effort "release + log on err" boilerplate that was duplicated at eight call sites across release.rs, cancel.rs, admin_cancel.rs, admin_settle.rs, and scheduler.rs into a single line per site. - cargo fmt cleanup across the touched files. Deferred (not in Phase 1 scope): - Distinguishing transient vs permanent LND errors during bond release: Phase 1 explicitly chose "always release on exit" as the ironclad guarantee. A typed gRPC error layer is a cross-cutting refactor better paired with the Phase 3 payout retry work, where it belongs. - Reusing one LndConnector across release_bonds_for_order's batch: per-order batches are 1–2 bonds in practice; not worth the API churn at Phase 1. - Tracking subscriber JoinHandles for cancellation: mirrors the existing crate::util::invoice_subscribe pattern. Cleaning up subscriber lifecycle is a separate concern that should land for trade and bond invoices together. Tests: cargo test (246 passed), cargo clippy --all-targets --all-features (clean), cargo fmt --all --check (clean). --- src/app/admin_cancel.rs | 8 +--- src/app/admin_settle.rs | 8 +--- src/app/bond/db.rs | 20 ++++++--- src/app/bond/flow.rs | 95 +++++++++++++++++++++++++++-------------- src/app/bond/mod.rs | 4 +- src/app/bond/types.rs | 38 +++++++++++++++++ src/app/cancel.rs | 32 ++------------ src/app/release.rs | 4 +- src/app/take_buy.rs | 4 +- src/app/take_sell.rs | 4 +- src/scheduler.rs | 13 +++--- 11 files changed, 132 insertions(+), 98 deletions(-) diff --git a/src/app/admin_cancel.rs b/src/app/admin_cancel.rs index bbdc1ee1..b0f73783 100644 --- a/src/app/admin_cancel.rs +++ b/src/app/admin_cancel.rs @@ -202,13 +202,7 @@ pub async fn admin_cancel_action( // Phase 1: admin cancellation always releases any taker bond. The // dispute slash path lands in Phase 2. - if let Err(e) = bond::release_bonds_for_order(pool, order.id).await { - tracing::warn!( - "admin_cancel: bond release failed for {}: {}", - order.id, - e - ); - } + bond::release_bonds_for_order_or_warn(pool, order.id, "admin_cancel").await; Ok(()) } diff --git a/src/app/admin_settle.rs b/src/app/admin_settle.rs index 70dfaa41..c63bd53f 100644 --- a/src/app/admin_settle.rs +++ b/src/app/admin_settle.rs @@ -191,13 +191,7 @@ pub async fn admin_settle_action( } // Phase 1: admin-settled disputes always release any taker bond. // Slashing on lost dispute lands in Phase 2. - if let Err(e) = bond::release_bonds_for_order(pool, order_updated.id).await { - tracing::warn!( - "admin_settle: bond release failed for {}: {}", - order_updated.id, - e - ); - } + bond::release_bonds_for_order_or_warn(pool, order_updated.id, "admin_settle").await; let _ = do_payment(ctx, order_updated, request_id).await; diff --git a/src/app/bond/db.rs b/src/app/bond/db.rs index c4196b2d..7505413c 100644 --- a/src/app/bond/db.rs +++ b/src/app/bond/db.rs @@ -83,12 +83,14 @@ pub async fn find_bond_by_hash( pub async fn find_active_bonds( pool: &Pool, ) -> Result, mostro_core::error::MostroError> { - sqlx::query_as::<_, Bond>( - "SELECT * FROM bonds WHERE state IN ('requested', 'locked') ORDER BY created_at ASC", - ) - .fetch_all(pool) - .await - .map_err(|e| MostroInternalErr(ServiceError::DbAccessError(e.to_string()))) + let requested = BondState::Requested.to_string(); + let locked = BondState::Locked.to_string(); + sqlx::query_as::<_, Bond>("SELECT * FROM bonds WHERE state IN (?, ?) ORDER BY created_at ASC") + .bind(requested) + .bind(locked) + .fetch_all(pool) + .await + .map_err(|e| MostroInternalErr(ServiceError::DbAccessError(e.to_string()))) } /// List the still-outstanding bonds attached to a single order. Phase 1 @@ -98,12 +100,16 @@ pub async fn find_active_bonds_for_order( pool: &Pool, order_id: Uuid, ) -> Result, mostro_core::error::MostroError> { + let requested = BondState::Requested.to_string(); + let locked = BondState::Locked.to_string(); sqlx::query_as::<_, Bond>( "SELECT * FROM bonds \ - WHERE order_id = ? AND state IN ('requested', 'locked') \ + WHERE order_id = ? AND state IN (?, ?) \ ORDER BY created_at ASC", ) .bind(order_id) + .bind(requested) + .bind(locked) .fetch_all(pool) .await .map_err(|e| MostroInternalErr(ServiceError::DbAccessError(e.to_string()))) diff --git a/src/app/bond/flow.rs b/src/app/bond/flow.rs index 63fdf80b..998f3bc8 100644 --- a/src/app/bond/flow.rs +++ b/src/app/bond/flow.rs @@ -23,6 +23,7 @@ //! status will land alongside the corresponding `mostro-core` release in a //! later phase. +use std::str::FromStr; use std::sync::Arc; use chrono::Utc; @@ -43,9 +44,7 @@ use crate::util::{ bytes_to_string, enqueue_order_msg, get_keys, set_waiting_invoice_status, show_hold_invoice, }; -use super::db::{ - create_bond, find_active_bonds, find_active_bonds_for_order, find_bond_by_hash, -}; +use super::db::{create_bond, find_active_bonds, find_active_bonds_for_order, find_bond_by_hash}; use super::math::compute_bond_amount; use super::model::Bond; use super::types::{BondRole, BondState}; @@ -90,12 +89,7 @@ pub async fn request_taker_bond( .await .map_err(|e| MostroInternalErr(ServiceError::HoldInvoiceError(e.to_string())))?; - let mut bond = Bond::new_requested( - order.id, - taker_pubkey.to_string(), - BondRole::Taker, - amount, - ); + let mut bond = Bond::new_requested(order.id, taker_pubkey.to_string(), BondRole::Taker, amount); bond.hash = Some(bytes_to_string(&hash)); bond.preimage = Some(bytes_to_string(&preimage)); bond.payment_request = Some(invoice_resp.payment_request.clone()); @@ -158,10 +152,16 @@ pub async fn request_taker_bond( /// more than one path (e.g. cooperative cancel after the LND subscriber /// already saw `Canceled`). pub async fn release_bond(pool: &Pool, bond: &Bond) -> Result<(), MostroError> { - if matches!( - bond.state.as_str(), - "released" | "slashed" | "failed" - ) { + // Parse `state` once into the enum so callers don't depend on the + // `Display` form for control flow (and a malformed value short- + // circuits to "no-op" instead of falsely transitioning). + let state = BondState::from_str(&bond.state).map_err(|e| { + MostroInternalErr(ServiceError::UnexpectedError(format!( + "Bond {} has unparseable state {:?}: {}", + bond.id, bond.state, e + ))) + })?; + if state.is_terminal() { return Ok(()); } @@ -228,6 +228,23 @@ pub async fn release_bonds_for_order( Ok(()) } +/// Best-effort release helper for the Phase 1 exit paths. +/// +/// Every order-exit flow (release, cancel, admin actions, scheduler +/// timeouts) wants the same shape: try to release the bond, and on +/// failure log a warning tagged with the call site — never propagate. +/// Centralising the pattern keeps each call site to a single line and +/// guarantees consistent log structure for operators. +pub async fn release_bonds_for_order_or_warn( + pool: &Pool, + order_id: Uuid, + context: &'static str, +) { + if let Err(e) = release_bonds_for_order(pool, order_id).await { + warn!("{context}: bond release failed for {}: {}", order_id, e); + } +} + /// Spawn the LND subscriber for a bond hold invoice. The subscriber /// transitions the bond row through `Locked` / `Released` based on the /// invoice state and, on `Locked`, resumes the original take flow. @@ -311,7 +328,7 @@ async fn on_bond_invoice_accepted( pool: &Pool, request_id: Option, ) -> Result<(), MostroError> { - let mut bond = match find_bond_by_hash(pool, hash).await? { + let bond = match find_bond_by_hash(pool, hash).await? { Some(b) => b, None => { warn!("Bond invoice accepted for unknown hash {hash}"); @@ -319,25 +336,38 @@ async fn on_bond_invoice_accepted( } }; - if bond.state == BondState::Locked.to_string() { - // Subscriber may emit Accepted more than once on reconnect; idempotent. - return Ok(()); - } - if bond.state != BondState::Requested.to_string() { - warn!( - "Bond {} accepted but state was {} — ignoring", - bond.id, bond.state - ); + // Concurrent subscriber firings (LND can emit Accepted more than once + // on reconnect, and the restart-time resubscriber re-attaches another + // listener) must not both run the take continuation. The conditional + // UPDATE is the single point of synchronisation: only the row that + // actually wins the `requested` → `locked` race continues here. + let now = Utc::now().timestamp(); + let result = + sqlx::query("UPDATE bonds SET state = ?, locked_at = ? WHERE id = ? AND state = ?") + .bind(BondState::Locked.to_string()) + .bind(now) + .bind(bond.id) + .bind(BondState::Requested.to_string()) + .execute(pool) + .await + .map_err(|e| MostroInternalErr(ServiceError::DbAccessError(e.to_string())))?; + + if result.rows_affected() == 0 { + // Either another subscriber already locked the bond (idempotent + // — nothing to do), or the row moved to a non-Requested state + // through a concurrent release path (also fine: the take won't + // resume on a released bond). Log only when surprising. + if !matches!(bond.state.as_str(), s if s == BondState::Requested.to_string() + || s == BondState::Locked.to_string()) + { + warn!( + "Bond {} accepted but state was {} — ignoring", + bond.id, bond.state + ); + } return Ok(()); } - bond.state = BondState::Locked.to_string(); - bond.locked_at = Some(Utc::now().timestamp()); - let bond = bond - .update(pool) - .await - .map_err(|e| MostroInternalErr(ServiceError::DbAccessError(e.to_string())))?; - info!("Bond {} locked for order {}", bond.id, bond.order_id); let order = Order::by_id(pool, bond.order_id) @@ -368,7 +398,10 @@ async fn on_bond_invoice_canceled(hash: &str, pool: &Pool) -> Result<(), None => return Ok(()), }; - if matches!(bond.state.as_str(), "released" | "slashed" | "failed") { + if BondState::from_str(&bond.state) + .map(|s| s.is_terminal()) + .unwrap_or(false) + { return Ok(()); } diff --git a/src/app/bond/mod.rs b/src/app/bond/mod.rs index 5f256cba..ec9bdf4e 100644 --- a/src/app/bond/mod.rs +++ b/src/app/bond/mod.rs @@ -16,8 +16,8 @@ pub mod model; pub mod types; pub use flow::{ - release_bond, release_bonds_for_order, request_taker_bond, resubscribe_active_bonds, - taker_bond_required, + release_bond, release_bonds_for_order, release_bonds_for_order_or_warn, request_taker_bond, + resubscribe_active_bonds, taker_bond_required, }; pub use math::compute_bond_amount; pub use model::Bond; diff --git a/src/app/bond/types.rs b/src/app/bond/types.rs index 6a75baa6..ade328bf 100644 --- a/src/app/bond/types.rs +++ b/src/app/bond/types.rs @@ -65,6 +65,29 @@ pub enum BondState { Failed, } +impl BondState { + /// True for states that should not be transitioned out of by Phase 1 + /// release paths: the bond is already done with from the operator's + /// perspective. Used so call sites don't have to enumerate the trio + /// of `Released | Slashed | Failed` manually (and so the daemon + /// doesn't grow to depend on the [`Display`] string form for control + /// flow). + pub fn is_terminal(self) -> bool { + matches!( + self, + BondState::Released | BondState::Slashed | BondState::Failed + ) + } + + /// True for states that still have an outstanding LND HTLC and are + /// candidates for release / slash. Inverse of [`BondState::is_terminal`] + /// minus `PendingPayout`, which is owned by the Phase 3 payout job + /// rather than the release flow. + pub fn is_active(self) -> bool { + matches!(self, BondState::Requested | BondState::Locked) + } +} + impl fmt::Display for BondState { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { let s = match self { @@ -185,4 +208,19 @@ mod tests { assert!(BondState::from_str("in-progress").is_err()); assert!(BondSlashReason::from_str("whatever").is_err()); } + + #[test] + fn terminal_and_active_helpers() { + for s in [BondState::Released, BondState::Slashed, BondState::Failed] { + assert!(s.is_terminal(), "{s} should be terminal"); + assert!(!s.is_active(), "{s} should not be active"); + } + for s in [BondState::Requested, BondState::Locked] { + assert!(s.is_active(), "{s} should be active"); + assert!(!s.is_terminal(), "{s} should not be terminal"); + } + // `PendingPayout` is neither: it's owned by the payout job. + assert!(!BondState::PendingPayout.is_terminal()); + assert!(!BondState::PendingPayout.is_active()); + } } diff --git a/src/app/cancel.rs b/src/app/cancel.rs index eedbbced..0a1d92a2 100644 --- a/src/app/cancel.rs +++ b/src/app/cancel.rs @@ -148,13 +148,7 @@ async fn cancel_cooperative_execution_step_2( // Phase 1: cooperative cancel always releases any taker bond. The // dispute slash path lands in Phase 2. - if let Err(e) = bond::release_bonds_for_order(pool, order.id).await { - tracing::warn!( - "cooperative_cancel: bond release failed for {}: {}", - order.id, - e - ); - } + bond::release_bonds_for_order_or_warn(pool, order.id, "cooperative_cancel").await; Ok(()) } @@ -262,13 +256,7 @@ async fn cancel_order_by_taker( // Phase 1: the taker cancelled before activating the trade — always // release the bond. Slashing for timeout-based cancels is Phase 4. - if let Err(e) = bond::release_bonds_for_order(pool, order_updated.id).await { - tracing::warn!( - "taker_cancel: bond release failed for {}: {}", - order_updated.id, - e - ); - } + bond::release_bonds_for_order_or_warn(pool, order_updated.id, "taker_cancel").await; Ok(()) } @@ -323,13 +311,7 @@ async fn cancel_order_by_maker( // Phase 1: maker cancelled before the trade went active — release any // taker bond that had already been locked. - if let Err(e) = bond::release_bonds_for_order(pool, order.id).await { - tracing::warn!( - "maker_cancel: bond release failed for {}: {}", - order.id, - e - ); - } + bond::release_bonds_for_order_or_warn(pool, order.id, "maker_cancel").await; Ok(()) } @@ -376,13 +358,7 @@ async fn cancel_pending_order_from_maker( // Phase 1: a maker cancelling a still-Pending order may be racing // with a taker who just locked a bond. Release any active bond so // the taker is made whole. - if let Err(e) = bond::release_bonds_for_order(pool, order.id).await { - tracing::warn!( - "pending_maker_cancel: bond release failed for {}: {}", - order.id, - e - ); - } + bond::release_bonds_for_order_or_warn(pool, order.id, "pending_maker_cancel").await; Ok(()) } diff --git a/src/app/release.rs b/src/app/release.rs index 90490d80..3ce43f57 100644 --- a/src/app/release.rs +++ b/src/app/release.rs @@ -273,9 +273,7 @@ pub async fn release_action( // hand off to the buyer payment task. Slashing is intentionally not // wired in yet — that's Phase 2+. A failed bond release is logged but // does not block trade finalization. - if let Err(e) = bond::release_bonds_for_order(pool, order.id).await { - tracing::warn!("release_action: bond release failed for {}: {}", order.id, e); - } + bond::release_bonds_for_order_or_warn(pool, order.id, "release_action").await; // Finally we try to pay buyer's invoice let _ = do_payment(ctx, order, request_id).await; diff --git a/src/app/take_buy.rs b/src/app/take_buy.rs index 970bb966..f8ea1759 100644 --- a/src/app/take_buy.rs +++ b/src/app/take_buy.rs @@ -104,9 +104,7 @@ pub async fn take_buy_action( // Defend against concurrent takes for the same order: if another // taker already has an active bond on this order, the second take // must back off rather than create a duplicate bond row. - let existing = find_active_bonds_for_order(pool, order.id) - .await - .map_err(|_| MostroCantDo(CantDoReason::PendingOrderExists))?; + let existing = find_active_bonds_for_order(pool, order.id).await?; if !existing.is_empty() { return Err(MostroCantDo(CantDoReason::PendingOrderExists)); } diff --git a/src/app/take_sell.rs b/src/app/take_sell.rs index 2475eae9..8541de8d 100644 --- a/src/app/take_sell.rs +++ b/src/app/take_sell.rs @@ -131,9 +131,7 @@ pub async fn take_sell_action( // `bond::flow::resume_take_after_bond` resumes the trade once the // bond locks. if bond::taker_bond_required() { - let existing = find_active_bonds_for_order(pool, order.id) - .await - .map_err(|_| MostroCantDo(CantDoReason::PendingOrderExists))?; + let existing = find_active_bonds_for_order(pool, order.id).await?; if !existing.is_empty() { return Err(MostroCantDo(CantDoReason::PendingOrderExists)); } diff --git a/src/scheduler.rs b/src/scheduler.rs index 1502c6bd..d841148a 100644 --- a/src/scheduler.rs +++ b/src/scheduler.rs @@ -419,13 +419,12 @@ async fn job_cancel_orders(ctx: AppContext) { // timeout lands in Phase 4 — and crucially MUST // gate on the cause being a real timeout, not a // user-driven cancel beforehand. - if let Err(e) = bond::release_bonds_for_order(pool, order_id).await { - tracing::warn!( - "scheduler: bond release failed for {}: {}", - order_id, - e - ); - } + bond::release_bonds_for_order_or_warn( + pool, + order_id, + "scheduler_timeout", + ) + .await; } } } From d34b22b0c1f157f365c53419a51d55c104787048 Mon Sep 17 00:00:00 2001 From: grunch Date: Sat, 25 Apr 2026 16:09:17 -0300 Subject: [PATCH 3/9] fix(bond): retry-on-resume-failure and feature-flag-independent release MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three coordinated fixes for the Phase 1 taker bond flow: 1. on_bond_invoice_accepted no longer skips the take continuation forever after a transient resume failure. Previously the conditional UPDATE that drives Requested → Locked was the only path that could call resume_take_after_bond — if that resume failed (LND/DB/Nostr blip while creating the trade hold invoice), every subsequent Accepted firing saw `state = locked` and returned without retrying, leaving the order stuck in Pending with the bond still locked. The handler now decouples the state transition from the resume retry: after the conditional UPDATE, it re-reads the bond and the order, and runs resume_take_after_bond whenever the bond is Locked AND the order is still Pending. Multiple firings are idempotent because the order moves out of Pending on first success. 2. Resume is gated on order.status == Pending. If a maker / admin / scheduler cancellation wins the race after the bond invoice was paid but before the handler ran, the order has already been transitioned away from Pending; reactivating it would issue inconsistent PayInvoice / WaitingBuyerInvoice messages on a canceled trade. The new guard logs the skip and returns Ok cleanly. 3. release_bonds_for_order and resubscribe_active_bonds no longer short-circuit on Settings::is_bond_enabled(). An operator who toggles the feature off (or removes the [anti_abuse_bond] block) with bonds already locked in LND would otherwise strand taker funds — the cancel/release/admin/scheduler exits, and the restart resubscriber, would all skip release for those rows. Both functions now run on the *DB state*: a single indexed SELECT is the cost of safety, and it returns zero rows for nodes that never enabled the feature. The release_bonds_for_order_no_op_when_disabled unit test is replaced by release_bonds_for_order_runs_regardless_of_feature_flag, which asserts the new invariant directly. Tests: cargo test (246 passed), cargo clippy --all-targets --all-features (clean), cargo fmt --all --check (clean). --- src/app/bond/flow.rs | 126 +++++++++++++++++++++++++++---------------- 1 file changed, 81 insertions(+), 45 deletions(-) diff --git a/src/app/bond/flow.rs b/src/app/bond/flow.rs index 998f3bc8..f3fe213a 100644 --- a/src/app/bond/flow.rs +++ b/src/app/bond/flow.rs @@ -206,19 +206,22 @@ pub async fn release_bond(pool: &Pool, bond: &Bond) -> Result<(), Mostro /// Release every active (`Requested` or `Locked`) bond attached to an /// order. Designed to be the **single** call sites use from each exit -/// path — the gate, the lookup, and the per-row release are all here. +/// path — the lookup and the per-row release are both here. /// -/// Returns `Ok(())` when the feature is disabled or no active bonds -/// exist; never fails the caller for individual bond failures (those -/// are logged and the loop continues). +/// **Not gated on `Settings::is_bond_enabled()`.** An operator can flip +/// the feature off (or remove the `[anti_abuse_bond]` block) while bonds +/// are still locked in LND from a prior enabled period; gating release +/// on the *current* config would strand those funds. The lookup is a +/// single indexed SELECT that returns zero rows for nodes that never +/// enabled the feature, so the cost of always running is negligible. +/// +/// Returns `Ok(())` when no active bonds exist; never fails the caller +/// for individual bond failures (those are logged and the loop +/// continues). pub async fn release_bonds_for_order( pool: &Pool, order_id: Uuid, ) -> Result<(), MostroError> { - if !Settings::is_bond_enabled() { - return Ok(()); - } - let bonds = find_active_bonds_for_order(pool, order_id).await?; for bond in bonds.iter() { if let Err(e) = release_bond(pool, bond).await { @@ -295,10 +298,13 @@ pub async fn bond_invoice_subscribe( /// Restart hook: re-subscribe to every bond that was still active when /// the daemon stopped. Called from `main` next to `find_held_invoices`. +/// +/// Like [`release_bonds_for_order`], this is **not gated on the current +/// feature flag**: bonds locked under a previous enabled period must +/// continue to flow through state transitions even after an operator +/// disables the feature, otherwise their hold invoices stay stranded +/// in LND. pub async fn resubscribe_active_bonds(pool: &Arc>) -> Result<(), MostroError> { - if !Settings::is_bond_enabled() { - return Ok(()); - } let bonds = find_active_bonds(pool.as_ref()).await?; for bond in bonds.into_iter() { if let Some(hash) = bond.hash.as_ref() { @@ -320,9 +326,19 @@ pub async fn resubscribe_active_bonds(pool: &Arc>) -> Result<(), Mo /// Subscriber callback for `InvoiceState::Accepted`: bond is locked. /// -/// Transitions the row to `Locked` and resumes the original take flow -/// (creates the trade hold invoice / asks the buyer for a payout -/// invoice, depending on the side). +/// Drives the bond from `Requested` to `Locked` via a conditional +/// `UPDATE`, then — independently of whether *this* call won the +/// transition — attempts to resume the take flow if (a) the bond is +/// `Locked` and (b) the order is still `Pending`. +/// +/// Decoupling the bond-state transition from the resume retry means a +/// transient resume failure (LND/DB/Nostr blip while creating the +/// trade hold invoice) doesn't leave the order stuck: the next +/// `Accepted` delivery — or the restart resubscriber — will retry the +/// continuation as long as both conditions still hold. Conversely, +/// if the order has moved out of `Pending` (resume already succeeded, +/// or maker/admin canceled in the meantime) the resume is skipped, so +/// we never reactivate a canceled order. async fn on_bond_invoice_accepted( hash: &str, pool: &Pool, @@ -336,11 +352,10 @@ async fn on_bond_invoice_accepted( } }; - // Concurrent subscriber firings (LND can emit Accepted more than once - // on reconnect, and the restart-time resubscriber re-attaches another - // listener) must not both run the take continuation. The conditional - // UPDATE is the single point of synchronisation: only the row that - // actually wins the `requested` → `locked` race continues here. + // Atomic Requested → Locked transition. Concurrent firings — LND + // reconnect, the restart-time resubscriber, etc. — race here and + // exactly one wins; the others see `rows_affected == 0` and fall + // through to the post-transition retry logic below. let now = Utc::now().timestamp(); let result = sqlx::query("UPDATE bonds SET state = ?, locked_at = ? WHERE id = ? AND state = ?") @@ -352,34 +367,54 @@ async fn on_bond_invoice_accepted( .await .map_err(|e| MostroInternalErr(ServiceError::DbAccessError(e.to_string())))?; - if result.rows_affected() == 0 { - // Either another subscriber already locked the bond (idempotent - // — nothing to do), or the row moved to a non-Requested state - // through a concurrent release path (also fine: the take won't - // resume on a released bond). Log only when surprising. - if !matches!(bond.state.as_str(), s if s == BondState::Requested.to_string() - || s == BondState::Locked.to_string()) - { + if result.rows_affected() == 1 { + info!("Bond {} locked for order {}", bond.id, bond.order_id); + } + + // Re-read the bond so a concurrent release (Locked → Released) is + // visible: in that case there's nothing to resume. + let current = match find_bond_by_hash(pool, hash).await? { + Some(b) => b, + None => return Ok(()), + }; + let current_state = match BondState::from_str(¤t.state) { + Ok(s) => s, + Err(e) => { warn!( - "Bond {} accepted but state was {} — ignoring", - bond.id, bond.state + "Bond {} has unparseable state {:?}: {} — skipping resume", + current.id, current.state, e ); + return Ok(()); } + }; + if current_state != BondState::Locked { + // Released / Slashed / Failed / Requested-still-but-something- + // else-is-wrong: nothing to resume on this firing. return Ok(()); } - info!("Bond {} locked for order {}", bond.id, bond.order_id); - - let order = Order::by_id(pool, bond.order_id) + let order = Order::by_id(pool, current.order_id) .await .map_err(|e| MostroInternalErr(ServiceError::DbAccessError(e.to_string())))? .ok_or_else(|| { MostroInternalErr(ServiceError::UnexpectedError(format!( "Bond {} references missing order {}", - bond.id, bond.order_id + current.id, current.order_id ))) })?; + // Defense-in-depth: only drive the take forward when the order is + // still in the pre-trade state we left it in. If it's already moved + // on (resume succeeded on a previous firing) or been canceled by a + // maker / admin / scheduler path, do not re-trigger the take. + if order.status != Status::Pending.to_string() { + info!( + "Bond {} accepted but order {} is in status {} — skipping resume", + current.id, order.id, order.status + ); + return Ok(()); + } + let my_keys = get_keys()?; resume_take_after_bond(pool, order, &my_keys, request_id).await } @@ -551,26 +586,27 @@ mod tests { } #[tokio::test] - async fn release_bonds_for_order_no_op_when_disabled() { + async fn release_bonds_for_order_runs_regardless_of_feature_flag() { // No `[anti_abuse_bond]` block in test settings → feature off. - // Function must succeed without touching LND or DB beyond a - // configuration check. + // Even so, an outstanding bond row from a prior enabled period + // MUST still be released; otherwise an operator who toggles the + // feature off strands taker funds in LND. let pool = setup_pool().await; - // Even with active bonds in the DB, the gate keeps us out. let order_id = Uuid::new_v4(); insert_order(&pool, order_id).await; - let _ = create_bond(&pool, make_bond(order_id, BondState::Locked)) - .await - .unwrap(); + // Use a hash-less Requested bond so release_bond skips LND in + // the unit-test harness (no Lightning settings configured). + let mut bond = make_bond(order_id, BondState::Requested); + bond.hash = None; + create_bond(&pool, bond).await.unwrap(); - // Settings::is_bond_enabled() reads MOSTRO_CONFIG which is unset - // in the unit-test harness → returns false. Verify the call path - // is a clean no-op. release_bonds_for_order(&pool, order_id).await.unwrap(); - // Bond untouched. let active = find_active_bonds_for_order(&pool, order_id).await.unwrap(); - assert_eq!(active.len(), 1); + assert!( + active.is_empty(), + "bond must be released even with feature disabled" + ); } #[tokio::test] From df222282acfb6d398017999b3a3eec960da06a04 Mon Sep 17 00:00:00 2001 From: grunch Date: Sat, 25 Apr 2026 16:21:24 -0300 Subject: [PATCH 4/9] fix(bond): never mark Released when LND cancel fails transiently MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The previous release_bond unconditionally marked the bond Released after warn-logging any cancel_hold_invoice error. For benign races (invoice already canceled / not found) that's correct, but for transient errors — LND unreachable, server returning Unavailable / DeadlineExceeded / Internal — it stranded the taker's funds: the HTLC stayed encumbered at LND while the DB row dropped out of find_active_bonds*'s "active" set (state IN ('requested', 'locked')), so no future code path retried the cancel. This change: - Modifies LndConnector::cancel_hold_invoice to preserve the gRPC Status code in the error message via a stable `code=` prefix (instead of stringifying the whole tonic::Status verbatim, which loses structure to the call site). - Adds a CancelOutcome classifier in bond/flow.rs that maps a cancel error to AlreadyDone (NotFound / AlreadyExists / known LND messages indicating the invoice is already gone) vs. Transient (everything else, including LND unreachable). Anything we can't classify confidently maps to Transient — the safer side is to delay cleanup, never to falsely report a release on an HTLC LND still has. - release_bond now only marks Released when the cancel landed (Ok) or was AlreadyDone. On Transient outcomes — including LndConnector::new() failing because LND is unreachable — it emits a structured warn (bond_id, order_id, outcome) and propagates the error, leaving the bond in its current active state. The recovery path for a left-active bond is implicit and already wired: the LND subscriber spawned by bond_invoice_subscribe (and re-attached by resubscribe_active_bonds on daemon restart) catches the eventual InvoiceState::Canceled — emitted when the hold invoice's CLTV expires and LND auto-cancels — and on_bond_invoice_canceled then marks the bond Released. The structured warn gives operators a visible signal in the meantime. New unit tests cover the classifier across gRPC-code prefixes, LND-specific message patterns, and the conservative Transient fallback for unrecognised errors. Tests: cargo test (249 passed), cargo clippy --all-targets --all-features (clean), cargo fmt --all --check (clean). --- src/app/bond/flow.rs | 179 +++++++++++++++++++++++++++++++++++++++---- src/lightning/mod.rs | 20 +++-- 2 files changed, 176 insertions(+), 23 deletions(-) diff --git a/src/app/bond/flow.rs b/src/app/bond/flow.rs index f3fe213a..2f8c06ef 100644 --- a/src/app/bond/flow.rs +++ b/src/app/bond/flow.rs @@ -143,14 +143,81 @@ pub async fn request_taker_bond( Ok(bond) } -/// Release a single bond: cancel the hold invoice (best-effort) and -/// transition the row to `Released`. +/// Outcome of a `cancel_hold_invoice` attempt against LND, classified +/// from the structured gRPC error so the caller can decide whether the +/// HTLC is verifiably no longer encumbered. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +enum CancelOutcome { + /// The cancel landed at LND (or didn't need to: the invoice was + /// already canceled / never existed). The HTLC, if there ever was + /// one, is no longer encumbered. Safe to mark `Released`. + AlreadyDone, + /// Transport / server error from LND, including LND being + /// unreachable. The HTLC **may still be encumbered**. Leave the bond + /// in its current active state so a future code path retries. + Transient, +} + +/// Classify the error returned by `LndConnector::cancel_hold_invoice`. /// -/// **Idempotent.** A bond already in a terminal state (`Released`, -/// `Slashed`, `Failed`) is a no-op. This matters because Phase 1 wires -/// release into every exit, and the same bond can plausibly be hit by -/// more than one path (e.g. cooperative cancel after the LND subscriber -/// already saw `Canceled`). +/// We rely on the `code=` prefix `cancel_hold_invoice` +/// embeds, plus message-text patterns LND emits when an invoice is +/// already canceled / unknown (those typically come back as +/// `code=Unknown` with a recognisable message, so message inspection is +/// load-bearing — not just defensive). +/// +/// Anything we can't classify confidently maps to `Transient`: the +/// safer side is to delay cleanup until the next exit path or CLTV +/// expiry, never to falsely report a release on an HTLC LND still has. +fn classify_cancel_error(err: &MostroError) -> CancelOutcome { + let s = err.to_string().to_lowercase(); + + // gRPC codes that mean the cancel was idempotent / target wasn't there. + if s.contains("code=notfound") || s.contains("code=alreadyexists") { + return CancelOutcome::AlreadyDone; + } + // LND-specific message patterns that come back under `code=Unknown`. + if s.contains("already cancelled") + || s.contains("already canceled") + || s.contains("unable to locate invoice") + || s.contains("invoice not found") + || s.contains("no such invoice") + { + return CancelOutcome::AlreadyDone; + } + // Everything else — Unavailable, DeadlineExceeded, transport errors, + // unexpected Internal, codes we don't recognise — is conservatively + // treated as transient. The bond stays active and gets retried on + // the next exit path / CLTV expiry / daemon restart. + CancelOutcome::Transient +} + +/// Release a single bond: cancel the hold invoice and transition the +/// row to `Released` **only if** the HTLC is verifiably no longer +/// encumbered. +/// +/// **Idempotent for terminal states.** A bond already in `Released`, +/// `Slashed`, or `Failed` is a no-op. +/// +/// **Safety contract for transient LND failures.** When +/// `cancel_hold_invoice` fails with a transport / server error (LND +/// unreachable, deadline exceeded, etc.), the bond is left in its +/// current active state and the error is propagated to the caller. +/// Marking `Released` here would drop the bond out of +/// `find_active_bonds*` (which filters on `state IN ('requested', +/// 'locked')`), stranding the taker's funds in LND with no retry path +/// — the [issue raised in the Phase 1 review](#). +/// +/// The recovery path for a left-active bond is implicit: +/// - The LND subscriber spawned by `bond_invoice_subscribe` (and +/// re-attached by `resubscribe_active_bonds` on restart) catches the +/// eventual `InvoiceState::Canceled` — emitted either when LND +/// recovers and we retry, or when the hold invoice's CLTV expires +/// and LND auto-cancels — and `on_bond_invoice_canceled` then marks +/// the bond `Released`. +/// - Operators see a structured `warn` event with `bond_id`, `order_id`, +/// and the classified outcome so they can spot and intervene if a +/// bond stays stuck. pub async fn release_bond(pool: &Pool, bond: &Bond) -> Result<(), MostroError> { // Parse `state` once into the enum so callers don't depend on the // `Display` form for control flow (and a malformed value short- @@ -169,20 +236,44 @@ pub async fn release_bond(pool: &Pool, bond: &Bond) -> Result<(), Mostro match LndConnector::new().await { Ok(mut ln) => { if let Err(e) = ln.cancel_hold_invoice(hash).await { - // Hold invoice already canceled / unknown to LND is the - // common race with the subscriber; we still want the row - // to land in `Released` so callers can move on. - warn!( - "Bond {} cancel_hold_invoice failed: {} — marking Released anyway", - bond.id, e - ); + match classify_cancel_error(&e) { + CancelOutcome::AlreadyDone => { + // Common race with the subscriber, or the + // invoice was never created in the first place + // (request_taker_bond bailed before the row got + // a hash). HTLC is verifiably gone — fall + // through to mark Released. + info!( + bond_id = %bond.id, + order_id = %bond.order_id, + "cancel_hold_invoice reports already-done ({}); marking Released", + e + ); + } + CancelOutcome::Transient => { + warn!( + bond_id = %bond.id, + order_id = %bond.order_id, + outcome = "transient", + "cancel_hold_invoice failed transiently ({}); leaving bond {} for retry", + e, bond.state + ); + return Err(e); + } + } } } Err(e) => { + // LND unreachable: definitionally transient. Don't pretend + // the HTLC is gone. warn!( - "Bond {} could not connect to LND for cancel: {} — marking Released anyway", - bond.id, e + bond_id = %bond.id, + order_id = %bond.order_id, + outcome = "transient", + "could not connect to LND for cancel ({}); leaving bond {} for retry", + e, bond.state ); + return Err(e); } } } @@ -635,4 +726,60 @@ mod tests { // of an `[anti_abuse_bond]` block. assert!(!taker_bond_required()); } + + fn ln_err(msg: &str) -> MostroError { + MostroInternalErr(ServiceError::LnNodeError(msg.to_string())) + } + + #[test] + fn classify_already_done_by_grpc_code() { + // The `code=NotFound` / `code=AlreadyExists` prefix is what the + // updated `cancel_hold_invoice` emits for benign outcomes. + assert_eq!( + classify_cancel_error(&ln_err("code=NotFound message=...")), + CancelOutcome::AlreadyDone + ); + assert_eq!( + classify_cancel_error(&ln_err("code=AlreadyExists message=duplicate")), + CancelOutcome::AlreadyDone + ); + } + + #[test] + fn classify_already_done_by_lnd_message() { + // LND returns these under `code=Unknown`, so message inspection + // is load-bearing. + for msg in [ + "code=Unknown message=invoice with that hash already cancelled", + "code=Unknown message=invoice with that hash already canceled", + "code=Unknown message=unable to locate invoice", + "code=Unknown message=invoice not found for hash", + "code=Unknown message=no such invoice", + ] { + assert_eq!( + classify_cancel_error(&ln_err(msg)), + CancelOutcome::AlreadyDone, + "expected AlreadyDone for: {msg}" + ); + } + } + + #[test] + fn classify_transient_for_transport_and_unknown() { + // Transport / server errors must NOT be treated as already-done: + // marking Released here would strand the HTLC. + for msg in [ + "code=Unavailable message=connection refused", + "code=DeadlineExceeded message=timeout", + "code=Internal message=server crashed", + "code=Unknown message=something we don't recognise", + "transport error", + ] { + assert_eq!( + classify_cancel_error(&ln_err(msg)), + CancelOutcome::Transient, + "expected Transient for: {msg}" + ); + } + } } diff --git a/src/lightning/mod.rs b/src/lightning/mod.rs index 89c3ece4..46aea702 100644 --- a/src/lightning/mod.rs +++ b/src/lightning/mod.rs @@ -149,16 +149,22 @@ impl LndConnector { let payment_hash = FromHex::from_hex(hash).expect("Wrong payment hash"); let cancel_message = CancelInvoiceMsg { payment_hash }; - let cancel = self - .client - .invoices() - .cancel_invoice(cancel_message) - .await - .map_err(|e| e.to_string()); + let cancel = self.client.invoices().cancel_invoice(cancel_message).await; match cancel { Ok(cancel) => Ok(cancel.into_inner()), - Err(e) => Err(MostroInternalErr(ServiceError::LnNodeError(e.to_string()))), + Err(status) => { + // Preserve the gRPC code in the error string with a stable + // `code=` prefix. Bond release uses this to tell + // benign "already canceled / not found" outcomes from + // transient transport failures so it can avoid marking a + // bond Released while the HTLC may still be encumbered. + Err(MostroInternalErr(ServiceError::LnNodeError(format!( + "code={:?} message={}", + status.code(), + status.message() + )))) + } } } From 2bba33aab3263569c8c1037eafc6df370abb7a41 Mon Sep 17 00:00:00 2001 From: grunch Date: Wed, 29 Apr 2026 10:56:02 -0300 Subject: [PATCH 5/9] fix(bond): allow taker self-cancel and re-takes before bond paid MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two interrelated taker-side gaps in the Phase 1 bond flow: - A taker who took a Pending order but hadn't paid the bond yet got IsNotYourOrder when trying to cancel, because cancel_action routed every Pending cancel through the maker-only path. cancel_action now recognises a bonded taker (event.sender matches an active bond's pubkey) and routes through cancel_order_by_taker, releasing the bond and republishing the order. - take_buy / take_sell rejected any new take while another taker had an active bond, letting a malicious user keep an order Pending by taking and never paying. The new bond::supersede_prior_taker_bonds helper releases still-Requested prior bonds (notifying the prior taker) so re-takes succeed; only a Locked prior bond — meaning the trade is already committed — still rejects with PendingOrderExists. For API-priced orders, the market-price recomputation also resets the local quote when superseding, so re-takes don't inherit a stale amount. take_sell additionally always assigns buyer_invoice from this take's payment_request, preventing a prior taker's invoice from leaking into the new take. Co-Authored-By: Claude Opus 4.7 (1M context) --- docs/ANTI_ABUSE_BOND.md | 17 +++++- src/app/bond/flow.rs | 130 ++++++++++++++++++++++++++++++++++++++++ src/app/bond/mod.rs | 2 +- src/app/cancel.rs | 90 +++++++++++++++++++++++++++- src/app/take_buy.rs | 31 ++++++---- src/app/take_sell.rs | 34 +++++++---- 6 files changed, 278 insertions(+), 26 deletions(-) diff --git a/docs/ANTI_ABUSE_BOND.md b/docs/ANTI_ABUSE_BOND.md index dad4aaef..1da5f684 100644 --- a/docs/ANTI_ABUSE_BOND.md +++ b/docs/ANTI_ABUSE_BOND.md @@ -203,9 +203,20 @@ to users. maker-side, including pending-order maker cancels), `admin_settle_action`, `admin_cancel_action`, and `scheduler::job_cancel_orders`. Slashing hooks are intentionally absent and land in Phase 2+. -- A guard in `take_buy_action` / `take_sell_action` rejects a take with - `PendingOrderExists` when an active bond row already exists for the - order, preventing duplicate bonds when two takers race. +- `take_buy_action` / `take_sell_action` call + `bond::supersede_prior_taker_bonds` before persisting the new take. + A still-`Requested` prior bond is released (its hold invoice + cancelled) so a malicious taker can't keep an order in `Pending` + by abandoning the bond invoice — anyone may re-take and the first + bond to lock wins. A `Locked` prior bond is treated as committed + and the new take is rejected with `PendingOrderExists`. +- `cancel_action` recognises a bonded taker as authorised to cancel a + still-`Pending` order: when `event.sender` matches the `pubkey` of an + active bond on the order, the cancel routes through the existing + `cancel_order_by_taker` flow (release the bond, clear the taker + fields, republish the order). This lets a taker who took the order + but no longer wants to proceed back out cleanly instead of getting + `IsNotYourOrder`. - On daemon startup, `bond::resubscribe_active_bonds` re-attaches LND invoice subscribers for any bond rows still in `Requested` / `Locked`, so a restart never strands a taker who paid the bond just before the diff --git a/src/app/bond/flow.rs b/src/app/bond/flow.rs index 2f8c06ef..d1846642 100644 --- a/src/app/bond/flow.rs +++ b/src/app/bond/flow.rs @@ -339,6 +339,68 @@ pub async fn release_bonds_for_order_or_warn( } } +/// Make the order takeable again when a prior taker hasn't paid their +/// bond yet. +/// +/// Called from `take_buy_action` / `take_sell_action` when a new taker +/// arrives for an order that still carries an active bond from a +/// previous take. Without this, a malicious user could keep an order +/// in `Pending` indefinitely by taking it and never paying — see issue +/// in (taker can DoS orders by +/// abandoning the bond invoice). +/// +/// Behaviour: +/// - If any active bond is `Locked`, the trade is already committed to +/// that taker — return `PendingOrderExists`. +/// - Otherwise (only `Requested` bonds), cancel each prior bond's hold +/// invoice and notify the prior taker that their take was +/// superseded. +/// +/// Bonds belonging to the new taker themselves (same `pubkey`) are +/// also released here: a single user retaking should also start with a +/// fresh bond invoice. +pub async fn supersede_prior_taker_bonds( + pool: &Pool, + order_id: Uuid, + new_taker: PublicKey, +) -> Result { + let existing = find_active_bonds_for_order(pool, order_id).await?; + if existing + .iter() + .any(|b| b.state == BondState::Locked.to_string()) + { + return Err(MostroCantDo(CantDoReason::PendingOrderExists)); + } + let new_taker_str = new_taker.to_string(); + let mut superseded = 0usize; + for bond in existing.iter() { + if let Err(e) = release_bond(pool, bond).await { + warn!( + bond_id = %bond.id, + order_id = %bond.order_id, + "supersede_prior_taker_bonds: failed to release bond ({}); aborting new take", + e + ); + return Err(e); + } + superseded += 1; + if bond.pubkey != new_taker_str { + if let Ok(prior_pk) = PublicKey::from_str(&bond.pubkey) { + enqueue_order_msg( + None, + Some(order_id), + Action::Canceled, + None, + prior_pk, + None, + ) + .await; + } + } + } + Ok(superseded) +} + /// Spawn the LND subscriber for a bond hold invoice. The subscriber /// transitions the bond row through `Locked` / `Released` based on the /// invoice state and, on `Locked`, resumes the original take flow. @@ -727,6 +789,74 @@ mod tests { assert!(!taker_bond_required()); } + fn fake_pubkey() -> PublicKey { + Keys::generate().public_key() + } + + #[tokio::test] + async fn supersede_releases_prior_requested_bond() { + // A new taker arriving while a prior taker has only requested + // (not yet locked) the bond must release the prior bond and + // return the count, so the order is takeable again. + let pool = setup_pool().await; + let order_id = Uuid::new_v4(); + insert_order(&pool, order_id).await; + let mut prior = make_bond(order_id, BondState::Requested); + prior.hash = None; // skip LND in the unit-test harness + prior.pubkey = "b".repeat(64); + create_bond(&pool, prior).await.unwrap(); + + let count = supersede_prior_taker_bonds(&pool, order_id, fake_pubkey()) + .await + .expect("supersede should succeed"); + assert_eq!(count, 1); + + let active = find_active_bonds_for_order(&pool, order_id).await.unwrap(); + assert!( + active.is_empty(), + "prior Requested bond must be released so the order is takeable again" + ); + } + + #[tokio::test] + async fn supersede_rejects_when_prior_bond_locked() { + // A `Locked` prior bond means the trade is already committed to + // that taker; a new take must back off rather than strand the + // committed taker's funds. + let pool = setup_pool().await; + let order_id = Uuid::new_v4(); + insert_order(&pool, order_id).await; + let mut prior = make_bond(order_id, BondState::Locked); + prior.hash = None; + prior.pubkey = "b".repeat(64); + create_bond(&pool, prior).await.unwrap(); + + let result = supersede_prior_taker_bonds(&pool, order_id, fake_pubkey()).await; + assert!(matches!( + result, + Err(MostroCantDo(CantDoReason::PendingOrderExists)) + )); + + // The locked bond must NOT be touched. + let active = find_active_bonds_for_order(&pool, order_id).await.unwrap(); + assert_eq!(active.len(), 1); + assert_eq!(active[0].state, BondState::Locked.to_string()); + } + + #[tokio::test] + async fn supersede_with_no_prior_bonds_is_noop() { + // The first taker must not be charged a "supersede" cost: when + // no prior bonds exist, the helper returns 0 cleanly. + let pool = setup_pool().await; + let order_id = Uuid::new_v4(); + insert_order(&pool, order_id).await; + + let count = supersede_prior_taker_bonds(&pool, order_id, fake_pubkey()) + .await + .unwrap(); + assert_eq!(count, 0); + } + fn ln_err(msg: &str) -> MostroError { MostroInternalErr(ServiceError::LnNodeError(msg.to_string())) } diff --git a/src/app/bond/mod.rs b/src/app/bond/mod.rs index ec9bdf4e..d9ceeb3a 100644 --- a/src/app/bond/mod.rs +++ b/src/app/bond/mod.rs @@ -17,7 +17,7 @@ pub mod types; pub use flow::{ release_bond, release_bonds_for_order, release_bonds_for_order_or_warn, request_taker_bond, - resubscribe_active_bonds, taker_bond_required, + resubscribe_active_bonds, supersede_prior_taker_bonds, taker_bond_required, }; pub use math::compute_bond_amount; pub use model::Bond; diff --git a/src/app/cancel.rs b/src/app/cancel.rs index 0a1d92a2..9426016d 100644 --- a/src/app/cancel.rs +++ b/src/app/cancel.rs @@ -399,8 +399,34 @@ async fn cancel_action_generic( // Pending: maker can revert to Canceled state and republish without cooperative steps. if order.check_status(Status::Pending).is_ok() { - cancel_pending_order_from_maker(pool, event, &mut order, my_keys, request_id).await?; - return Ok(()); + if order.sent_from_maker(event.sender).is_ok() { + cancel_pending_order_from_maker(pool, event, &mut order, my_keys, request_id).await?; + return Ok(()); + } + // Phase 1: a taker who took the order but hasn't paid the bond + // yet leaves the order in `Pending` (the taker fields are + // populated; the bond row sits in `Requested`). Allow that taker + // to back out — release the bond, clear the taker fields, and + // republish the order so other takers can take it. We identify + // the bonded taker by matching `event.sender` against an active + // bond's `pubkey`. + let active_bonds = + crate::app::bond::db::find_active_bonds_for_order(pool, order.id).await?; + let sender_str = event.sender.to_string(); + if active_bonds.iter().any(|b| b.pubkey == sender_str) { + cancel_order_by_taker( + pool, + event, + order, + my_keys, + request_id, + ln_client, + event.sender, + ) + .await?; + return Ok(()); + } + return Err(MostroCantDo(CantDoReason::IsNotYourOrder)); } // Do the appropriate cancellation flow based on the order status @@ -646,4 +672,64 @@ mod tests { Err(MostroCantDo(CantDoReason::IsNotYourOrder)) )); } + + /// Phase 1 fix: a taker who took a `Pending` order but hasn't paid + /// the bond yet must be able to cancel and back out, even though + /// the order is still `Pending`. Before this fix, `cancel_action` + /// routed every cancel on a `Pending` order through the maker path + /// and returned `IsNotYourOrder` for the bonded taker. + /// + /// We assert the routing change at the *decision* layer: an active + /// bond row whose `pubkey` matches `event.sender` switches the + /// cancel out of the maker-only path. The full cancel side-effects + /// (`update_order_event`, `notify_creator`) reach into globals + /// (`get_db_pool`, etc.) that aren't initialized in unit tests, so + /// they're covered by integration tests rather than asserted here. + #[tokio::test] + async fn pending_taker_with_active_bond_is_not_routed_as_intruder() { + use crate::app::bond::db::find_active_bonds_for_order; + let pool = Arc::new(SqlitePool::connect("sqlite::memory:").await.unwrap()); + sqlx::migrate!("./migrations") + .run(pool.as_ref()) + .await + .unwrap(); + + let maker = Keys::generate().public_key(); + let taker = Keys::generate().public_key(); + let order = create_pending_order(maker, taker) + .create(pool.as_ref()) + .await + .unwrap(); + + // Insert a Requested bond row whose pubkey matches the taker's. + let mut bond = crate::app::bond::Bond::new_requested( + order.id, + taker.to_string(), + crate::app::bond::BondRole::Taker, + 1_500, + ); + bond.hash = None; + bond.create(pool.as_ref()).await.unwrap(); + + // Sanity: the helper finds the bond by sender match — this is + // exactly the predicate `cancel_action_generic` uses to decide + // whether to route to the taker-cancel path. + let active = find_active_bonds_for_order(pool.as_ref(), order.id) + .await + .unwrap(); + let sender_str = taker.to_string(); + assert!( + active.iter().any(|b| b.pubkey == sender_str), + "the taker must be recognised as a bonded sender" + ); + + // And the intruder (non-maker, no bond) must still NOT match, + // so the routing falls through to `IsNotYourOrder`. + let intruder = Keys::generate().public_key(); + let intruder_str = intruder.to_string(); + assert!( + !active.iter().any(|b| b.pubkey == intruder_str), + "an intruder with no bond row must not be routed to the taker-cancel path" + ); + } } diff --git a/src/app/take_buy.rs b/src/app/take_buy.rs index f8ea1759..6c4335d5 100644 --- a/src/app/take_buy.rs +++ b/src/app/take_buy.rs @@ -1,5 +1,5 @@ use crate::app::bond; -use crate::app::bond::db::find_active_bonds_for_order; +use crate::app::bond::supersede_prior_taker_bonds; use crate::app::context::AppContext; use crate::util::{ get_dev_fee, get_fiat_amount_requested, get_market_amount_and_fee, get_order, show_hold_invoice, @@ -43,6 +43,25 @@ pub async fn take_buy_action( .not_sent_from_maker(event.sender) .map_err(MostroCantDo)?; + // Anti-abuse bond (Phase 1): release any prior taker's still- + // `Requested` bond before this take proceeds, so a malicious user + // can't block the order by abandoning the bond invoice. A `Locked` + // prior bond means the trade is already committed and the helper + // returns `PendingOrderExists`. Done before the market-price + // recomputation below so re-takes of API-priced orders see a fresh + // quote. + let bond_required = bond::taker_bond_required(); + let superseded = if bond_required { + supersede_prior_taker_bonds(pool, order.id, event.sender).await? + } else { + 0 + }; + if superseded > 0 && order.price_from_api { + order.amount = 0; + order.fee = 0; + order.dev_fee = 0; + } + // Get the fiat amount requested by the user for range orders if let Some(am) = get_fiat_amount_requested(&order, &msg) { order.fiat_amount = am; @@ -100,15 +119,7 @@ pub async fn take_buy_action( // (status stays `Pending`) and request the bond. The trade hold // invoice is created later — once the bond locks — by the bond // subscriber's continuation in `bond::flow::resume_take_after_bond`. - if bond::taker_bond_required() { - // Defend against concurrent takes for the same order: if another - // taker already has an active bond on this order, the second take - // must back off rather than create a duplicate bond row. - let existing = find_active_bonds_for_order(pool, order.id).await?; - if !existing.is_empty() { - return Err(MostroCantDo(CantDoReason::PendingOrderExists)); - } - + if bond_required { // Stash the seller (taker) trade pubkey so the post-bond // continuation can resume `show_hold_invoice` with the same // arguments the legacy path would have used. diff --git a/src/app/take_sell.rs b/src/app/take_sell.rs index 8541de8d..9a566dea 100644 --- a/src/app/take_sell.rs +++ b/src/app/take_sell.rs @@ -1,5 +1,5 @@ use crate::app::bond; -use crate::app::bond::db::find_active_bonds_for_order; +use crate::app::bond::supersede_prior_taker_bonds; use crate::app::context::AppContext; use crate::db::{buyer_has_pending_order, update_user_trade_index}; use crate::util::{ @@ -66,6 +66,23 @@ pub async fn take_sell_action( .not_sent_from_maker(event.sender) .map_err(MostroCantDo)?; + // Anti-abuse bond (Phase 1): release any prior taker's still- + // `Requested` bond before this take proceeds. A `Locked` prior bond + // means the trade is already committed and the helper returns + // `PendingOrderExists`. Done before the market-price recomputation + // below so re-takes of API-priced orders see a fresh quote. + let bond_required = bond::taker_bond_required(); + let superseded = if bond_required { + supersede_prior_taker_bonds(pool, order.id, event.sender).await? + } else { + 0 + }; + if superseded > 0 && order.price_from_api { + order.amount = 0; + order.fee = 0; + order.dev_fee = 0; + } + // Get seller pubkey let seller_pubkey = order.get_seller_pubkey().map_err(MostroInternalErr)?; @@ -130,15 +147,12 @@ pub async fn take_sell_action( // invoice if the taker provided one, and request the taker's bond. // `bond::flow::resume_take_after_bond` resumes the trade once the // bond locks. - if bond::taker_bond_required() { - let existing = find_active_bonds_for_order(pool, order.id).await?; - if !existing.is_empty() { - return Err(MostroCantDo(CantDoReason::PendingOrderExists)); - } - - if let Some(invoice) = payment_request.as_ref() { - order.buyer_invoice = Some(invoice.clone()); - } + if bond_required { + // Always set `buyer_invoice` from this take's `payment_request` + // (including back to `None`): otherwise a prior taker's invoice + // would persist into this take when the new taker did not + // provide one. + order.buyer_invoice = payment_request.clone(); let persisted = order .update(pool) From f2e9f0e2c88209bb126a262ae9af35875aa59534 Mon Sep 17 00:00:00 2001 From: grunch Date: Wed, 29 Apr 2026 11:07:29 -0300 Subject: [PATCH 6/9] fix(bond): notify bonded taker on maker-cancel and release on Pending expiry MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two Phase 1 release-on-exit gaps the previous taker-cancel patch didn't cover: - cancel_pending_order_from_maker only DM'd the maker. A taker who had already requested or locked a bond on the still-Pending order got no signal even though the bond was being released — now we iterate find_active_bonds_for_order and Canceled-notify each bonded pubkey other than the cancelling maker. - job_expire_pending_older_orders flipped the order to Expired but never released attached bonds, so a slow taker whose bond was outstanding when the maker order's expires_at elapsed had the HTLC sit in LND until CLTV. Mirror the scheduler_timeout hook on the expiry path with release_bonds_for_order_or_warn(..., "pending_expiry"). Co-Authored-By: Claude Opus 4.7 (1M context) --- src/app/cancel.rs | 27 +++++++++++++++++++++++++-- src/scheduler.rs | 15 +++++++++++++++ 2 files changed, 40 insertions(+), 2 deletions(-) diff --git a/src/app/cancel.rs b/src/app/cancel.rs index 9426016d..15527c31 100644 --- a/src/app/cancel.rs +++ b/src/app/cancel.rs @@ -356,8 +356,31 @@ async fn cancel_pending_order_from_maker( ) .await; // Phase 1: a maker cancelling a still-Pending order may be racing - // with a taker who just locked a bond. Release any active bond so - // the taker is made whole. + // with a taker who just locked (or only requested) a bond. Notify + // every bonded taker so they don't keep waiting on a cancelled + // order, and release the bonds so they're made whole. The bond + // pubkey is the canonical source of who has a stake here — for a + // fresh Pending order with no taker yet, the lookup returns empty + // and this is a no-op. + if let Ok(active_bonds) = + crate::app::bond::db::find_active_bonds_for_order(pool, order.id).await + { + for active in active_bonds.iter() { + if let Ok(taker_pk) = PublicKey::from_str(&active.pubkey) { + if taker_pk != event.sender { + enqueue_order_msg( + None, + Some(order.id), + Action::Canceled, + None, + taker_pk, + None, + ) + .await; + } + } + } + } bond::release_bonds_for_order_or_warn(pool, order.id, "pending_maker_cancel").await; Ok(()) } diff --git a/src/scheduler.rs b/src/scheduler.rs index d841148a..8e803fc8 100644 --- a/src/scheduler.rs +++ b/src/scheduler.rs @@ -461,7 +461,22 @@ async fn job_expire_pending_older_orders(ctx: AppContext) { if let Ok(order_updated) = crate::util::update_order_event(&keys, Status::Expired, order).await { + let order_id = order_updated.id; let _ = order_updated.update(pool).await; + // Phase 1: a Pending order may be carrying a + // still-active taker bond (Phase 1 keeps the + // order in `Pending` while the taker funds the + // bond hold invoice). Without this hook the + // bond stays in `Requested`/`Locked` and the + // HTLC sits in LND until CLTV expiry — Phase 1 + // promises "always release" on every exit + // path, expiry included. + bond::release_bonds_for_order_or_warn( + pool, + order_id, + "pending_expiry", + ) + .await; } } } From 3e0f4bb63f6dafd6c3d0effe56b63ea0f97d7b49 Mon Sep 17 00:00:00 2001 From: grunch Date: Wed, 29 Apr 2026 11:15:30 -0300 Subject: [PATCH 7/9] fix(bond): subscribe before emitting bond invoice; gate bond release on persist MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two robustness fixes from the Phase 1 follow-up review: - request_taker_bond armed the LND subscriber AFTER shipping the bolt11. A taker who paid before the subscriber attached would slip through the Accepted event and the take would never resume, with the HTLC unwinding only at CLTV. Reorder so the subscriber is attached first; on subscribe failure roll back the persisted bond via release_bond and propagate the error so the take fails cleanly rather than leaving a Requested row with no listener. - Both scheduler jobs (job_cancel_orders and job_expire_pending_older_orders) ignored the Result from order_updated.update(pool) and unconditionally released bonds. On a persist failure the bond would be released while the order stayed in its pre-cancel/Pending status, so the next tick would re-publish a cancel with no funds left to refund. Match against the persist result and only release on Ok; on Err warn and let the next tick retry. CLTV remains the eventual safety net. Note: release_bonds_for_order_or_warn intentionally swallows transient LND errors and warns — the function name and doc spell out the best-effort contract, and release_bond's doc walks through the subscriber+CLTV recovery path. Strict callers can use release_bonds_for_order directly. Not changing. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/app/bond/flow.rs | 24 +++++++++++- src/scheduler.rs | 89 ++++++++++++++++++++++++++++++-------------- 2 files changed, 83 insertions(+), 30 deletions(-) diff --git a/src/app/bond/flow.rs b/src/app/bond/flow.rs index d1846642..67c62f15 100644 --- a/src/app/bond/flow.rs +++ b/src/app/bond/flow.rs @@ -124,6 +124,28 @@ pub async fn request_taker_bond( None, ); + // Arm the LND subscriber BEFORE shipping the bolt11 to the taker. + // If we emit the invoice first and the taker pays before the + // subscriber is attached, we miss the `Accepted` event and the + // take never resumes (the HTLC eventually unwinds via CLTV but + // the trade is dead in the meantime). On subscribe failure, undo + // the persisted bond so we don't strand a `Requested` row with + // no listener — and keep the invoice unsent so the taker can + // retry the take cleanly. + if let Err(e) = bond_invoice_subscribe(hash, request_id).await { + warn!( + bond_id = %bond.id, + order_id = %bond.order_id, + "request_taker_bond: subscribe failed ({}); rolling back bond row", + e + ); + // Best-effort cleanup: cancel the LND hold invoice and mark + // the row Released. Mirrors the "always release" exit path + // contract. + let _ = release_bond(pool, &bond).await; + return Err(e); + } + enqueue_order_msg( request_id, Some(order.id), @@ -138,8 +160,6 @@ pub async fn request_taker_bond( ) .await; - bond_invoice_subscribe(hash, request_id).await?; - Ok(bond) } diff --git a/src/scheduler.rs b/src/scheduler.rs index 8e803fc8..8c3f1553 100644 --- a/src/scheduler.rs +++ b/src/scheduler.rs @@ -412,19 +412,36 @@ async fn job_cancel_orders(ctx: AppContext) { new_status ); let order_id = order_updated.id; - // update order on db - let _ = order_updated.update(pool).await; - // Phase 1: scheduler-driven cancels (waiting-state - // timeouts) always release the bond. Slashing on - // timeout lands in Phase 4 — and crucially MUST - // gate on the cause being a real timeout, not a - // user-driven cancel beforehand. - bond::release_bonds_for_order_or_warn( - pool, - order_id, - "scheduler_timeout", - ) - .await; + // Persist the new status before releasing + // bonds: a release on top of a failed + // persist would leave the order in its + // pre-cancel status while the bond is + // gone, so the next scheduler tick keeps + // re-publishing cancels with no funds to + // refund. Skip release on persist failure + // — the next tick retries persist, and + // the bond's CLTV expiry is the safety + // net. + match order_updated.update(pool).await { + Ok(_) => { + // Phase 1: scheduler-driven cancels + // (waiting-state timeouts) always + // release the bond. Slashing on + // timeout lands in Phase 4. + bond::release_bonds_for_order_or_warn( + pool, + order_id, + "scheduler_timeout", + ) + .await; + } + Err(e) => { + tracing::warn!( + "scheduler_timeout: persist failed for order {} ({}); skipping bond release — will retry next tick", + order_id, e + ); + } + } } } } @@ -462,21 +479,37 @@ async fn job_expire_pending_older_orders(ctx: AppContext) { crate::util::update_order_event(&keys, Status::Expired, order).await { let order_id = order_updated.id; - let _ = order_updated.update(pool).await; - // Phase 1: a Pending order may be carrying a - // still-active taker bond (Phase 1 keeps the - // order in `Pending` while the taker funds the - // bond hold invoice). Without this hook the - // bond stays in `Requested`/`Locked` and the - // HTLC sits in LND until CLTV expiry — Phase 1 - // promises "always release" on every exit - // path, expiry included. - bond::release_bonds_for_order_or_warn( - pool, - order_id, - "pending_expiry", - ) - .await; + // Same gate as the timeout job: only release + // bonds when the Expired status was actually + // persisted. On persist failure the next tick + // reprocesses the still-Pending order; CLTV + // expiry is the eventual safety net. + match order_updated.update(pool).await { + Ok(_) => { + // Phase 1: a Pending order may be + // carrying a still-active taker bond + // (Phase 1 keeps the order in `Pending` + // while the taker funds the bond hold + // invoice). Without this hook the bond + // stays in `Requested`/`Locked` and + // the HTLC sits in LND until CLTV + // expiry — Phase 1 promises "always + // release" on every exit path, + // expiry included. + bond::release_bonds_for_order_or_warn( + pool, + order_id, + "pending_expiry", + ) + .await; + } + Err(e) => { + tracing::warn!( + "pending_expiry: persist failed for order {} ({}); skipping bond release — will retry next tick", + order_id, e + ); + } + } } } } From 6172c4eedaeff265ea06fcc4eebc7d89f46cb9ed Mon Sep 17 00:00:00 2001 From: grunch Date: Wed, 29 Apr 2026 11:18:57 -0300 Subject: [PATCH 8/9] fix format --- src/app/bond/flow.rs | 11 ++--------- src/app/cancel.rs | 11 ++--------- 2 files changed, 4 insertions(+), 18 deletions(-) diff --git a/src/app/bond/flow.rs b/src/app/bond/flow.rs index 67c62f15..2bec8398 100644 --- a/src/app/bond/flow.rs +++ b/src/app/bond/flow.rs @@ -406,15 +406,8 @@ pub async fn supersede_prior_taker_bonds( superseded += 1; if bond.pubkey != new_taker_str { if let Ok(prior_pk) = PublicKey::from_str(&bond.pubkey) { - enqueue_order_msg( - None, - Some(order_id), - Action::Canceled, - None, - prior_pk, - None, - ) - .await; + enqueue_order_msg(None, Some(order_id), Action::Canceled, None, prior_pk, None) + .await; } } } diff --git a/src/app/cancel.rs b/src/app/cancel.rs index 15527c31..55fe0148 100644 --- a/src/app/cancel.rs +++ b/src/app/cancel.rs @@ -368,15 +368,8 @@ async fn cancel_pending_order_from_maker( for active in active_bonds.iter() { if let Ok(taker_pk) = PublicKey::from_str(&active.pubkey) { if taker_pk != event.sender { - enqueue_order_msg( - None, - Some(order.id), - Action::Canceled, - None, - taker_pk, - None, - ) - .await; + enqueue_order_msg(None, Some(order.id), Action::Canceled, None, taker_pk, None) + .await; } } } From 479179c972b957a5a2401bdb886dbd5e1db7afb4 Mon Sep 17 00:00:00 2001 From: grunch Date: Wed, 29 Apr 2026 18:00:34 -0300 Subject: [PATCH 9/9] fix(bond): harden taker bond cleanup and DB-error handling in cancel - cancel_order_by_taker: split into a wrapper that always runs release_bonds_for_order_or_warn after the fallible inner flow, so early `?` exits can no longer strand a Requested/Locked bond row. - cancel_pending_order_from_maker: replace the silent if-let with a match that warns with order.id when the bond lookup fails, so bonded-taker notification failures stop being invisible. - cancel_action (Pending branch): stop propagating bond-lookup DB errors that block a legitimate taker self-cancel; log and fall back to the in-memory taker pubkey on the order before returning IsNotYourOrder. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/app/cancel.rs | 104 ++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 86 insertions(+), 18 deletions(-) diff --git a/src/app/cancel.rs b/src/app/cancel.rs index 55fe0148..105ffba1 100644 --- a/src/app/cancel.rs +++ b/src/app/cancel.rs @@ -9,7 +9,7 @@ use nostr_sdk::prelude::*; use sqlx::{Pool, Sqlite}; use sqlx_crud::Crud; use std::str::FromStr; -use tracing::info; +use tracing::{info, warn}; pub trait CancelLightning { fn cancel_hold_invoice<'a>( @@ -203,7 +203,38 @@ async fn cancel_cooperative_execution_step_1( /// - Notify the taker /// - Reset quote-derived amounts (if any) and return order to initial state /// - Notify the maker/creator that the order is republished +/// +/// The trailing bond release runs on every exit path (including early +/// `?` returns from the inner steps) so a mid-flow failure can never +/// leave a `Requested`/`Locked` bond row stranded for this taker. +/// `release_bonds_for_order_or_warn` is idempotent — it only acts on +/// rows still in active states — so re-entering it after the inner +/// flow already released is a safe no-op. async fn cancel_order_by_taker( + pool: &Pool, + event: &UnwrappedMessage, + order: Order, + my_keys: &Keys, + request_id: Option, + ln_client: &mut L, + taker_pubkey: PublicKey, +) -> Result<(), MostroError> { + let order_id = order.id; + let result = cancel_order_by_taker_inner( + pool, + event, + order, + my_keys, + request_id, + ln_client, + taker_pubkey, + ) + .await; + bond::release_bonds_for_order_or_warn(pool, order_id, "taker_cancel").await; + result +} + +async fn cancel_order_by_taker_inner( pool: &Pool, event: &UnwrappedMessage, mut order: Order, @@ -254,10 +285,6 @@ async fn cancel_order_by_taker( // Notify the creator about the republished order after the taker-side cancellation flow completes notify_creator(&order_updated, request_id).await?; - // Phase 1: the taker cancelled before activating the trade — always - // release the bond. Slashing for timeout-based cancels is Phase 4. - bond::release_bonds_for_order_or_warn(pool, order_updated.id, "taker_cancel").await; - Ok(()) } @@ -362,17 +389,35 @@ async fn cancel_pending_order_from_maker( // pubkey is the canonical source of who has a stake here — for a // fresh Pending order with no taker yet, the lookup returns empty // and this is a no-op. - if let Ok(active_bonds) = - crate::app::bond::db::find_active_bonds_for_order(pool, order.id).await - { - for active in active_bonds.iter() { - if let Ok(taker_pk) = PublicKey::from_str(&active.pubkey) { - if taker_pk != event.sender { - enqueue_order_msg(None, Some(order.id), Action::Canceled, None, taker_pk, None) + // + // A DB error here must not silently drop bonded-taker notifications: + // log it with order context, then still run the bond release below + // so cleanup happens regardless of the lookup outcome. + match crate::app::bond::db::find_active_bonds_for_order(pool, order.id).await { + Ok(active_bonds) => { + for active in active_bonds.iter() { + if let Ok(taker_pk) = PublicKey::from_str(&active.pubkey) { + if taker_pk != event.sender { + enqueue_order_msg( + None, + Some(order.id), + Action::Canceled, + None, + taker_pk, + None, + ) .await; + } } } } + Err(err) => { + warn!( + order_id = %order.id, + "pending_maker_cancel: failed to look up active bonds for taker notification: {}", + err + ); + } } bond::release_bonds_for_order_or_warn(pool, order.id, "pending_maker_cancel").await; Ok(()) @@ -423,13 +468,36 @@ async fn cancel_action_generic( // yet leaves the order in `Pending` (the taker fields are // populated; the bond row sits in `Requested`). Allow that taker // to back out — release the bond, clear the taker fields, and - // republish the order so other takers can take it. We identify - // the bonded taker by matching `event.sender` against an active - // bond's `pubkey`. - let active_bonds = - crate::app::bond::db::find_active_bonds_for_order(pool, order.id).await?; + // republish the order so other takers can take it. + // + // Prefer matching `event.sender` against an active bond row + // (the canonical signal). A transient DB failure on that + // lookup must not block a legitimate taker self-cancel: log + // it and fall back to the in-memory taker pubkey on the order + // (whichever side does not match `creator_pubkey`). For a + // fresh Pending order with no taker yet, neither check + // matches and we still return `IsNotYourOrder`. let sender_str = event.sender.to_string(); - if active_bonds.iter().any(|b| b.pubkey == sender_str) { + let bond_match = + match crate::app::bond::db::find_active_bonds_for_order(pool, order.id).await { + Ok(active_bonds) => active_bonds.iter().any(|b| b.pubkey == sender_str), + Err(e) => { + warn!( + order_id = %order.id, + "cancel: bond lookup failed for pending taker self-cancel: {}", e + ); + false + } + }; + let order_taker_match = order + .buyer_pubkey + .as_deref() + .is_some_and(|p| p == sender_str && p != order.creator_pubkey) + || order + .seller_pubkey + .as_deref() + .is_some_and(|p| p == sender_str && p != order.creator_pubkey); + if bond_match || order_taker_match { cancel_order_by_taker( pool, event,