diff --git a/config.go b/config.go index e9ce4103542..5bbd5a4fef7 100644 --- a/config.go +++ b/config.go @@ -30,6 +30,7 @@ import ( "github.com/lightningnetwork/lnd/channeldb" "github.com/lightningnetwork/lnd/discovery" "github.com/lightningnetwork/lnd/funding" + graphdb "github.com/lightningnetwork/lnd/graph/db" "github.com/lightningnetwork/lnd/htlcswitch" "github.com/lightningnetwork/lnd/htlcswitch/hodl" "github.com/lightningnetwork/lnd/input" @@ -675,8 +676,9 @@ func DefaultConfig() Config { Sig: lncfg.DefaultSigWorkers, }, Caches: &lncfg.Caches{ - RejectCacheSize: channeldb.DefaultRejectCacheSize, - ChannelCacheSize: channeldb.DefaultChannelCacheSize, + RejectCacheSize: channeldb.DefaultRejectCacheSize, + ChannelCacheSize: channeldb.DefaultChannelCacheSize, + PublicNodeCacheSize: graphdb.DefaultPublicNodeCacheSize, }, Prometheus: lncfg.DefaultPrometheus(), Watchtower: lncfg.DefaultWatchtowerCfg(defaultTowerDir), diff --git a/config_builder.go b/config_builder.go index 7ce63041ee2..bab756ab3f6 100644 --- a/config_builder.go +++ b/config_builder.go @@ -1045,6 +1045,7 @@ func (d *DefaultDatabaseBuilder) BuildDatabase( graphDBOptions := []graphdb.StoreOptionModifier{ graphdb.WithRejectCacheSize(cfg.Caches.RejectCacheSize), graphdb.WithChannelCacheSize(cfg.Caches.ChannelCacheSize), + graphdb.WithPublicNodeCacheSize(cfg.Caches.PublicNodeCacheSize), graphdb.WithBatchCommitInterval(cfg.DB.BatchCommitInterval), } diff --git a/docs/release-notes/release-notes-0.20.1.md b/docs/release-notes/release-notes-0.20.1.md index 344894073f1..5d2f6a4b5b9 100644 --- a/docs/release-notes/release-notes-0.20.1.md +++ b/docs/release-notes/release-notes-0.20.1.md @@ -92,6 +92,12 @@ safe single-writer behavior until the wallet subsystem is fully concurrent-safe. +* [Add caching for](https://github.com/lightningnetwork/lnd/pull/10363) +`IsPublicNode` query which speedup calls to check for nodes visibility status. +This reduces the amount of time lnd needs to query the db to determine if a node +is public or not. Also added a new config `caches.public-node-cache-size` which +controls the max number of entries that the cache can accommodate. + ## Deprecations # Technical and Architectural Updates @@ -107,5 +113,6 @@ # Contributors (Alphabetical Order) +* Abdulkbk * bitromortac * Ziggie diff --git a/graph/db/graph_sql_test.go b/graph/db/graph_sql_test.go new file mode 100644 index 00000000000..1825bdad45c --- /dev/null +++ b/graph/db/graph_sql_test.go @@ -0,0 +1,75 @@ +//go:build test_db_postgres || test_db_sqlite + +package graphdb + +import ( + "testing" + + "github.com/stretchr/testify/require" +) + +// TestNodeIsPublicCacheInvalidation ensures that we invalidate correctly our +// cache we use when determining if a node is public or not. +func TestNodeIsPublicCacheInvalidation(t *testing.T) { + t.Parallel() + ctx := t.Context() + + graph := MakeTestGraph(t) + + node1 := createTestVertex(t) + node2 := createTestVertex(t) + + require.NoError(t, graph.AddNode(ctx, node1)) + require.NoError(t, graph.AddNode(ctx, node2)) + + edge, _ := createEdge(10, 0, 0, 0, node1, node2) + require.NoError(t, graph.AddChannelEdge(ctx, &edge)) + + // First IsPublic call should populate cache. + isPublic1, err := graph.IsPublicNode(node1.PubKeyBytes) + require.NoError(t, err) + require.True(t, isPublic1) + + // Test invalidation scenarios: + + // 1. DeleteChannelEdges: + // Above, the channel being public should be cached, but we expect that + // DeleteChannelEdge will invalidate the cache for both nodes else when + // we call IsPublic, we will hit the cache. + err = graph.DeleteChannelEdges(false, true, edge.ChannelID) + require.NoError(t, err) + isPublic1, err = graph.IsPublicNode(node1.PubKeyBytes) + require.NoError(t, err) + require.False(t, isPublic1) + + isPublic2, err := graph.IsPublicNode(node2.PubKeyBytes) + require.NoError(t, err) + require.False(t, isPublic2) + + // 2. AddChannelEdge: + // Now we know that the last `IsPublicNode` call above will cache our + // nodes with `isPublic` = false. But add a new channel edge should + // invalidate the cache such that when we call `IsPublic` it should + // return `True`. + edge2, _ := createEdge(10, 1, 0, 1, node1, node2) + require.NoError(t, graph.AddChannelEdge(ctx, &edge2)) + isPublic1, err = graph.IsPublicNode(node1.PubKeyBytes) + require.NoError(t, err) + require.True(t, isPublic1) + + isPublic2, err = graph.IsPublicNode(node2.PubKeyBytes) + require.NoError(t, err) + require.True(t, isPublic2) + + // 3. DeleteNode: + // Again, the last two sets of `IsPublic` should have cached our nodes + // as `True`. Now we can delete a node and expect the next call to be + // False. + // + // NOTE: We don't get an error calling `IsPublicNode` because of how the + // SQL query is implemented to check for the existence of public nodes. + require.NoError(t, graph.DeleteNode(ctx, node1.PubKeyBytes)) + isPublic1, err = graph.IsPublicNode(node1.PubKeyBytes) + require.NoError(t, err) + require.False(t, isPublic1) +} diff --git a/graph/db/graph_test.go b/graph/db/graph_test.go index 590c077d608..7b57415591c 100644 --- a/graph/db/graph_test.go +++ b/graph/db/graph_test.go @@ -1640,6 +1640,8 @@ func TestGraphCacheTraversal(t *testing.T) { require.Equal(t, numChannels*2*(numNodes-1), numNodeChans) } +// fillTestGraph fills the graph with a given number of nodes and create a given +// number of channels between each node. func fillTestGraph(t testing.TB, graph *ChannelGraph, numNodes, numChannels int) (map[uint64]struct{}, []*models.Node) { @@ -4042,6 +4044,29 @@ func TestNodeIsPublic(t *testing.T) { ) } +// BenchmarkIsPublicNode measures the performance of IsPublicNode when checking +// a large number of nodes. +func BenchmarkIsPublicNode(b *testing.B) { + graph := MakeTestGraph(b) + + // Create a graph with a reasonable number of nodes and channels. + numNodes := 8000 + numChans := 4 + _, nodes := fillTestGraph(b, graph, numNodes, numChans) + + // Pick any node to test with. + nodePub := nodes[len(nodes)/2].PubKeyBytes + + // Reset the timer to exclude setup time especially since + // `fillTestGraph` can take a while. + b.ResetTimer() + + for b.Loop() { + _, err := graph.IsPublicNode(nodePub) + require.NoError(b, err) + } +} + // TestDisabledChannelIDs ensures that the disabled channels within the // disabledEdgePolicyBucket are managed properly and the list returned from // DisabledChannelIDs is correct. diff --git a/graph/db/options.go b/graph/db/options.go index 15ea6f4ee85..ae818b6ae74 100644 --- a/graph/db/options.go +++ b/graph/db/options.go @@ -13,6 +13,11 @@ const ( // around 40MB. DefaultChannelCacheSize = 20000 + // DefaultPublicNodeCacheSize is the default number of node public + // status entries to cache. With 15k entries, this produces a cache of + // around 1-1.5MB (including map overhead and LRU bookkeeping). + DefaultPublicNodeCacheSize = 15000 + // DefaultPreAllocCacheNumNodes is the default number of channels we // assume for mainnet for pre-allocating the graph cache. As of // September 2021, there currently are 14k nodes in a strictly pruned @@ -125,6 +130,10 @@ type StoreOptions struct { // channel cache. ChannelCacheSize int + // PublicNodeCacheSize is the maximum number of node public status + // entries to hold in the cache. + PublicNodeCacheSize int + // BatchCommitInterval is the maximum duration the batch schedulers will // wait before attempting to commit a pending set of updates. BatchCommitInterval time.Duration @@ -138,9 +147,10 @@ type StoreOptions struct { // DefaultOptions returns a StoreOptions populated with default values. func DefaultOptions() *StoreOptions { return &StoreOptions{ - RejectCacheSize: DefaultRejectCacheSize, - ChannelCacheSize: DefaultChannelCacheSize, - NoMigration: false, + RejectCacheSize: DefaultRejectCacheSize, + ChannelCacheSize: DefaultChannelCacheSize, + PublicNodeCacheSize: DefaultPublicNodeCacheSize, + NoMigration: false, } } @@ -169,3 +179,10 @@ func WithBatchCommitInterval(interval time.Duration) StoreOptionModifier { o.BatchCommitInterval = interval } } + +// WithPublicNodeCacheSize sets the PublicNodeCacheSize to n. +func WithPublicNodeCacheSize(n int) StoreOptionModifier { + return func(o *StoreOptions) { + o.PublicNodeCacheSize = n + } +} diff --git a/graph/db/sql_store.go b/graph/db/sql_store.go index 3e2d74dc248..d83e2f43912 100644 --- a/graph/db/sql_store.go +++ b/graph/db/sql_store.go @@ -21,6 +21,8 @@ import ( "github.com/btcsuite/btcd/btcutil" "github.com/btcsuite/btcd/chaincfg/chainhash" "github.com/btcsuite/btcd/wire" + "github.com/lightninglabs/neutrino/cache" + "github.com/lightninglabs/neutrino/cache/lru" "github.com/lightningnetwork/lnd/aliasmgr" "github.com/lightningnetwork/lnd/batch" "github.com/lightningnetwork/lnd/fn/v2" @@ -176,6 +178,8 @@ type SQLStore struct { rejectCache *rejectCache chanCache *channelCache + publicNodeCache *lru.Cache[[33]byte, *cachedPublicNode] + chanScheduler batch.Scheduler[SQLQueries] nodeScheduler batch.Scheduler[SQLQueries] @@ -183,6 +187,18 @@ type SQLStore struct { srcNodeMu sync.Mutex } +// cachedPublicNode is a simple wrapper for a boolean value that can be +// stored in an LRU cache. The LRU cache requires a Size() method. +type cachedPublicNode struct { + isPublic bool +} + +// Size returns the size of the cache entry. We return 1 as we just want to +// limit the number of entries rather than their actual memory size. +func (c *cachedPublicNode) Size() (uint64, error) { + return 1, nil +} + // A compile-time assertion to ensure that SQLStore implements the V1Store // interface. var _ V1Store = (*SQLStore)(nil) @@ -217,7 +233,10 @@ func NewSQLStore(cfg *SQLStoreConfig, db BatchedSQLQueries, db: db, rejectCache: newRejectCache(opts.RejectCacheSize), chanCache: newChannelCache(opts.ChannelCacheSize), - srcNodes: make(map[lnwire.GossipVersion]*srcNodeInfo), + publicNodeCache: lru.NewCache[[33]byte, *cachedPublicNode]( + uint64(opts.PublicNodeCacheSize), + ), + srcNodes: make(map[lnwire.GossipVersion]*srcNodeInfo), } s.chanScheduler = batch.NewTimeScheduler( @@ -404,6 +423,8 @@ func (s *SQLStore) DeleteNode(ctx context.Context, return fmt.Errorf("unable to delete node: %w", err) } + s.removePublicNodeCache(pubKey) + return nil } @@ -714,6 +735,10 @@ func (s *SQLStore) AddChannelEdge(ctx context.Context, default: s.rejectCache.remove(edge.ChannelID) s.chanCache.remove(edge.ChannelID) + s.removePublicNodeCache( + edge.NodeKey1Bytes, edge.NodeKey2Bytes, + ) + return nil } }, @@ -1730,6 +1755,7 @@ func (s *SQLStore) MarkEdgeZombie(chanID uint64, s.rejectCache.remove(chanID) s.chanCache.remove(chanID) + s.removePublicNodeCache(pubKey1, pubKey2) return nil } @@ -1957,6 +1983,14 @@ func (s *SQLStore) DeleteChannelEdges(strictZombiePruning, markZombie bool, s.chanCache.remove(chanID) } + var pubkeys [][33]byte + for _, edge := range edges { + pubkeys = append( + pubkeys, edge.NodeKey1Bytes, edge.NodeKey2Bytes, + ) + } + s.removePublicNodeCache(pubkeys...) + return edges, nil } @@ -2292,8 +2326,19 @@ func (s *SQLStore) ChannelID(chanPoint *wire.OutPoint) (uint64, error) { func (s *SQLStore) IsPublicNode(pubKey [33]byte) (bool, error) { ctx := context.TODO() + // Check the cache first and return early if there is a hit. + cached, err := s.publicNodeCache.Get(pubKey) + if err == nil && cached != nil { + return cached.isPublic, nil + } + + // Log any error other than NotFound. + if err != nil && !errors.Is(err, cache.ErrElementNotFound) { + log.Warnf("Unable to check cache if node is public: %v", err) + } + var isPublic bool - err := s.db.ExecTx(ctx, sqldb.ReadTxOpt(), func(db SQLQueries) error { + err = s.db.ExecTx(ctx, sqldb.ReadTxOpt(), func(db SQLQueries) error { var err error isPublic, err = db.IsPublicV1Node(ctx, pubKey[:]) @@ -2304,6 +2349,14 @@ func (s *SQLStore) IsPublicNode(pubKey [33]byte) (bool, error) { "public: %w", err) } + // Store the result in cache. + _, err = s.publicNodeCache.Put(pubKey, &cachedPublicNode{ + isPublic: isPublic, + }) + if err != nil { + log.Warnf("Unable to store node info in cache: %v", err) + } + return isPublic, nil } @@ -2655,6 +2708,9 @@ func (s *SQLStore) PruneGraph(spentOutputs []*wire.OutPoint, for _, channel := range closedChans { s.rejectCache.remove(channel.ChannelID) s.chanCache.remove(channel.ChannelID) + s.removePublicNodeCache( + channel.NodeKey1Bytes, channel.NodeKey2Bytes, + ) } return closedChans, prunedNodes, nil @@ -2923,6 +2979,9 @@ func (s *SQLStore) DisconnectBlockAtHeight(height uint32) ( for _, channel := range removedChans { s.rejectCache.remove(channel.ChannelID) s.chanCache.remove(channel.ChannelID) + s.removePublicNodeCache( + channel.NodeKey1Bytes, channel.NodeKey2Bytes, + ) } s.cacheMu.Unlock() @@ -5869,3 +5928,14 @@ func handleZombieMarking(ctx context.Context, db SQLQueries, }, ) } + +// removePublicNodeCache takes in a list of public keys and removes the +// corresponding nodes info from the cache if it exists. +// +// NOTE: This can safely be called without holding a lock since the lru is +// thread safe. +func (s *SQLStore) removePublicNodeCache(pubkeys ...[33]byte) { + for _, pubkey := range pubkeys { + s.publicNodeCache.Delete(pubkey) + } +} diff --git a/lncfg/caches.go b/lncfg/caches.go index 2457bb1d04a..e3e156ff876 100644 --- a/lncfg/caches.go +++ b/lncfg/caches.go @@ -14,6 +14,10 @@ const ( // channeldb's channel cache. This amounts to roughly 2 MB when full. MinChannelCacheSize = 1000 + // MinPublicNodeCacheSize is a floor on the maximum capacity allowed for + // public node cache. This amount is roughly 500 KB when full. + MinPublicNodeCacheSize = 5000 + // DefaultRPCGraphCacheDuration is the default interval that the RPC // response to DescribeGraph should be cached for. DefaultRPCGraphCacheDuration = time.Minute @@ -37,6 +41,11 @@ type Caches struct { // RPCGraphCacheDuration is used to control the flush interval of the // channel graph cache. RPCGraphCacheDuration time.Duration `long:"rpc-graph-cache-duration" description:"The period of time expressed as a duration (1s, 1m, 1h, etc) that the RPC response to DescribeGraph should be cached for."` + + // PublicNodeCacheSize is the maximum number of entries stored in lnd's + // public node cache, which is used to speed up checks for nodes + // visibility. Memory usage is roughly 100b per entry. + PublicNodeCacheSize int `long:"public-node-cache-size" description:"Maximum number of entries contained in the public node cache, which is used to speed up checks for nodes visibility. Each entry requires roughly 100 bytes."` } // Validate checks the Caches configuration for values that are too small to be @@ -50,6 +59,11 @@ func (c *Caches) Validate() error { return fmt.Errorf("channel cache size %d is less than min: %d", c.ChannelCacheSize, MinChannelCacheSize) } + if c.PublicNodeCacheSize < MinPublicNodeCacheSize { + return fmt.Errorf("public node cache size %d is less than "+ + "min: %d", c.PublicNodeCacheSize, + MinPublicNodeCacheSize) + } return nil } diff --git a/sample-lnd.conf b/sample-lnd.conf index c9a2865b858..1566a7281b2 100644 --- a/sample-lnd.conf +++ b/sample-lnd.conf @@ -1388,6 +1388,10 @@ ; roughly 2Kb. ; caches.channel-cache-size=20000 +; Maximum number of entries contained in the public node cache, which is used to +; speed up checks for nodes visibility. Each entry requires roughly 100 bytes. +; caches.public-node-cache-size=15000 + ; The duration that the response to DescribeGraph should be cached for. Setting ; the value to zero disables the cache. ; Default: