@@ -20,6 +20,7 @@ use crate::message_queue::MessageQueue;
20
20
use crate :: persist:: {
21
21
LIQUIDITY_MANAGER_PERSISTENCE_PRIMARY_NAMESPACE , LSPS5_SERVICE_PERSISTENCE_SECONDARY_NAMESPACE ,
22
22
} ;
23
+ use crate :: prelude:: hash_map:: Entry ;
23
24
use crate :: prelude:: * ;
24
25
use crate :: sync:: { Arc , Mutex , RwLock , RwLockWriteGuard } ;
25
26
use crate :: utils:: time:: TimeProvider ;
@@ -35,6 +36,7 @@ use lightning::util::persist::KVStore;
35
36
use lightning:: util:: ser:: Writeable ;
36
37
37
38
use core:: ops:: Deref ;
39
+ use core:: sync:: atomic:: { AtomicUsize , Ordering } ;
38
40
use core:: time:: Duration ;
39
41
40
42
use alloc:: string:: String ;
@@ -139,6 +141,7 @@ where
139
141
node_signer : NS ,
140
142
kv_store : K ,
141
143
last_pruning : Mutex < Option < LSPSDateTime > > ,
144
+ persistence_in_flight : AtomicUsize ,
142
145
}
143
146
144
147
impl < CM : Deref , NS : Deref , K : Deref + Clone , TP : Deref > LSPS5ServiceHandler < CM , NS , K , TP >
@@ -166,6 +169,7 @@ where
166
169
node_signer,
167
170
kv_store,
168
171
last_pruning : Mutex :: new ( None ) ,
172
+ persistence_in_flight : AtomicUsize :: new ( 0 ) ,
169
173
}
170
174
}
171
175
@@ -220,6 +224,8 @@ where
220
224
221
225
let key = counterparty_node_id. to_string ( ) ;
222
226
227
+ // Begin the write with the `per_peer_state` write lock held to avoid racing with
228
+ // potentially-in-flight `persist` calls writing state for the same peer.
223
229
self . kv_store . write (
224
230
LIQUIDITY_MANAGER_PERSISTENCE_PRIMARY_NAMESPACE ,
225
231
LSPS5_SERVICE_PERSISTENCE_SECONDARY_NAMESPACE ,
@@ -242,38 +248,80 @@ where
242
248
// TODO: We should eventually persist in parallel, however, when we do, we probably want to
243
249
// introduce some batching to upper-bound the number of requests inflight at any given
244
250
// time.
245
- let mut need_remove = Vec :: new ( ) ;
246
- let mut need_persist = Vec :: new ( ) ;
247
- {
248
- let mut outer_state_lock = self . per_peer_state . write ( ) . unwrap ( ) ;
249
- self . check_prune_stale_webhooks ( & mut outer_state_lock) ;
250
-
251
- outer_state_lock. retain ( |client_id, peer_state| {
252
- let is_prunable = peer_state. is_prunable ( ) ;
253
- let has_open_channel = self . client_has_open_channel ( client_id) ;
254
- if is_prunable && !has_open_channel {
255
- need_remove. push ( * client_id) ;
256
- } else if peer_state. needs_persist {
257
- need_persist. push ( * client_id) ;
258
- }
259
- !is_prunable || has_open_channel
260
- } ) ;
261
- } ;
262
251
263
- for counterparty_node_id in need_persist. into_iter ( ) {
264
- debug_assert ! ( !need_remove. contains( & counterparty_node_id) ) ;
265
- self . persist_peer_state ( counterparty_node_id) . await ?;
252
+ if self . persistence_in_flight . fetch_add ( 1 , Ordering :: AcqRel ) > 0 {
253
+ // If we're not the first event processor to get here, just return early, the increment
254
+ // we just did will be treated as "go around again" at the end.
255
+ return Ok ( ( ) ) ;
266
256
}
267
257
268
- for counterparty_node_id in need_remove {
269
- let key = counterparty_node_id. to_string ( ) ;
270
- self . kv_store
271
- . remove (
272
- LIQUIDITY_MANAGER_PERSISTENCE_PRIMARY_NAMESPACE ,
273
- LSPS5_SERVICE_PERSISTENCE_SECONDARY_NAMESPACE ,
274
- & key,
275
- )
276
- . await ?;
258
+ loop {
259
+ let mut need_remove = Vec :: new ( ) ;
260
+ let mut need_persist = Vec :: new ( ) ;
261
+
262
+ self . check_prune_stale_webhooks ( & mut self . per_peer_state . write ( ) . unwrap ( ) ) ;
263
+ {
264
+ let outer_state_lock = self . per_peer_state . read ( ) . unwrap ( ) ;
265
+
266
+ for ( client_id, peer_state) in outer_state_lock. iter ( ) {
267
+ let is_prunable = peer_state. is_prunable ( ) ;
268
+ let has_open_channel = self . client_has_open_channel ( client_id) ;
269
+ if is_prunable && !has_open_channel {
270
+ need_remove. push ( * client_id) ;
271
+ } else if peer_state. needs_persist {
272
+ need_persist. push ( * client_id) ;
273
+ }
274
+ }
275
+ }
276
+
277
+ for client_id in need_persist. into_iter ( ) {
278
+ debug_assert ! ( !need_remove. contains( & client_id) ) ;
279
+ self . persist_peer_state ( client_id) . await ?;
280
+ }
281
+
282
+ for client_id in need_remove {
283
+ let mut future_opt = None ;
284
+ {
285
+ // We need to take the `per_peer_state` write lock to remove an entry, but also
286
+ // have to hold it until after the `remove` call returns (but not through
287
+ // future completion) to ensure that writes for the peer's state are
288
+ // well-ordered with other `persist_peer_state` calls even across the removal
289
+ // itself.
290
+ let mut per_peer_state = self . per_peer_state . write ( ) . unwrap ( ) ;
291
+ if let Entry :: Occupied ( mut entry) = per_peer_state. entry ( client_id) {
292
+ let state = entry. get_mut ( ) ;
293
+ if state. is_prunable ( ) && !self . client_has_open_channel ( & client_id) {
294
+ entry. remove ( ) ;
295
+ let key = client_id. to_string ( ) ;
296
+ future_opt = Some ( self . kv_store . remove (
297
+ LIQUIDITY_MANAGER_PERSISTENCE_PRIMARY_NAMESPACE ,
298
+ LSPS5_SERVICE_PERSISTENCE_SECONDARY_NAMESPACE ,
299
+ & key,
300
+ ) ) ;
301
+ } else {
302
+ // If the peer was re-added, force a re-persist of the current state.
303
+ state. needs_persist = true ;
304
+ }
305
+ } else {
306
+ // This should never happen, we can only have one `persist` call
307
+ // in-progress at once and map entries are only removed by it.
308
+ debug_assert ! ( false ) ;
309
+ }
310
+ }
311
+ if let Some ( future) = future_opt {
312
+ future. await ?;
313
+ } else {
314
+ self . persist_peer_state ( client_id) . await ?;
315
+ }
316
+ }
317
+
318
+ if self . persistence_in_flight . fetch_sub ( 1 , Ordering :: AcqRel ) != 1 {
319
+ // If another thread incremented the state while we were running we should go
320
+ // around again, but only once.
321
+ self . persistence_in_flight . store ( 1 , Ordering :: Release ) ;
322
+ continue ;
323
+ }
324
+ break ;
277
325
}
278
326
279
327
Ok ( ( ) )
@@ -761,7 +809,7 @@ impl PeerState {
761
809
} ) ;
762
810
}
763
811
764
- fn is_prunable ( & mut self ) -> bool {
812
+ fn is_prunable ( & self ) -> bool {
765
813
self . webhooks . is_empty ( )
766
814
}
767
815
}
0 commit comments