Skip to content

Commit 9257431

Browse files
Merge pull request #89 from tylertreat-wf/iterator
Ctrie Iterator
2 parents 183224c + 0f0497f commit 9257431

File tree

5 files changed

+222
-32
lines changed

5 files changed

+222
-32
lines changed

README.md

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -141,6 +141,28 @@ immutable version. Unfortunately, to make the B-tree generic we require an
141141
interface and the most expensive operation in CPU profiling is the interface
142142
method which in turn calls into runtime.assertI2T. We need generics.
143143

144+
#### Ctrie
145+
146+
A concurrent, lock-free hash array mapped trie with efficient non-blocking
147+
snapshots. For lookups, Ctries have comparable performance to concurrent skip
148+
lists and concurrent hashmaps. One key advantage of Ctries is they are
149+
dynamically allocated. Memory consumption is always proportional to the number
150+
of keys in the Ctrie, while hashmaps typically have to grow and shrink. Lookups,
151+
inserts, and removes are O(logn).
152+
153+
One interesting advantage Ctries have over traditional concurrent data
154+
structures is support for lock-free, linearizable, constant-time snapshots.
155+
Most concurrent data structures do not support snapshots, instead opting for
156+
locks or requiring a quiescent state. This allows Ctries to have O(1) iterator
157+
creation and clear operations and O(logn) size retrieval.
158+
159+
#### Persistent List
160+
161+
A persistent, immutable linked list. All write operations yield a new, updated
162+
structure which preserve and reuse previous versions. This uses a very
163+
functional, cons-style of list manipulation. Insert, get, remove, and size
164+
operations are O(n) as you would expect.
165+
144166
### Installation
145167

146168
1. Install Go 1.3 or higher.

list/persistent.go

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,10 @@ type PersistentList interface {
6969
// FindIndex applies the predicate function to the list and returns the
7070
// index of the first item which matches or -1 if there is no match.
7171
FindIndex(func(interface{}) bool) int
72+
73+
// Map applies the function to each entry in the list and returns the
74+
// resulting slice.
75+
Map(func(interface{}) interface{}) []interface{}
7276
}
7377

7478
type emptyList struct{}
@@ -133,6 +137,12 @@ func (e *emptyList) FindIndex(func(interface{}) bool) int {
133137
return -1
134138
}
135139

140+
// Map applies the function to each entry in the list and returns the resulting
141+
// slice.
142+
func (e *emptyList) Map(func(interface{}) interface{}) []interface{} {
143+
return nil
144+
}
145+
136146
type list struct {
137147
head interface{}
138148
tail PersistentList
@@ -237,3 +247,9 @@ func (l *list) FindIndex(pred func(interface{}) bool) int {
237247
idx += 1
238248
}
239249
}
250+
251+
// Map applies the function to each entry in the list and returns the resulting
252+
// slice.
253+
func (l *list) Map(f func(interface{}) interface{}) []interface{} {
254+
return append(l.tail.Map(f), f(l.head))
255+
}

list/persistent_test.go

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -276,3 +276,14 @@ func TestLength(t *testing.T) {
276276
l = l.Add("bar").Add("baz")
277277
assert.Equal(uint(3), l.Length())
278278
}
279+
280+
func TestMap(t *testing.T) {
281+
assert := assert.New(t)
282+
f := func(x interface{}) interface{} {
283+
return x.(int) * x.(int)
284+
}
285+
assert.Nil(Empty.Map(f))
286+
287+
l := Empty.Add(1).Add(2).Add(3).Add(4)
288+
assert.Equal([]interface{}{1, 4, 9, 16}, l.Map(f))
289+
}

trie/ctrie/ctrie.go

Lines changed: 108 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ package ctrie
2525

2626
import (
2727
"bytes"
28+
"errors"
2829
"hash"
2930
"hash/fnv"
3031
"sync/atomic"
@@ -202,7 +203,7 @@ type tNode struct {
202203

203204
// untombed returns the S-node contained by the T-node.
204205
func (t *tNode) untombed() *sNode {
205-
return &sNode{&entry{key: t.key, hash: t.hash, value: t.value}}
206+
return &sNode{&Entry{Key: t.Key, hash: t.hash, Value: t.Value}}
206207
}
207208

208209
// lNode is a list node which is a leaf node used to handle hashcode
@@ -219,25 +220,25 @@ func (l *lNode) entry() *sNode {
219220

220221
// lookup returns the value at the given entry in the L-node or returns false
221222
// if it's not contained.
222-
func (l *lNode) lookup(e *entry) (interface{}, bool) {
223+
func (l *lNode) lookup(e *Entry) (interface{}, bool) {
223224
found, ok := l.Find(func(sn interface{}) bool {
224-
return bytes.Equal(e.key, sn.(*sNode).key)
225+
return bytes.Equal(e.Key, sn.(*sNode).Key)
225226
})
226227
if !ok {
227228
return nil, false
228229
}
229-
return found.(*sNode).value, true
230+
return found.(*sNode).Value, true
230231
}
231232

232233
// inserted creates a new L-node with the added entry.
233-
func (l *lNode) inserted(entry *entry) *lNode {
234+
func (l *lNode) inserted(entry *Entry) *lNode {
234235
return &lNode{l.Add(&sNode{entry})}
235236
}
236237

237238
// removed creates a new L-node with the entry removed.
238-
func (l *lNode) removed(e *entry) *lNode {
239+
func (l *lNode) removed(e *Entry) *lNode {
239240
idx := l.FindIndex(func(sn interface{}) bool {
240-
return bytes.Equal(e.key, sn.(*sNode).key)
241+
return bytes.Equal(e.Key, sn.(*sNode).Key)
241242
})
242243
if idx < 0 {
243244
return l
@@ -254,17 +255,16 @@ func (l *lNode) length() uint {
254255
// branch is either an iNode or sNode.
255256
type branch interface{}
256257

257-
// entry contains a Ctrie entry, which is also a technique used to cache the
258-
// hashcode of the key.
259-
type entry struct {
260-
key []byte
258+
// Entry contains a Ctrie key-value pair.
259+
type Entry struct {
260+
Key []byte
261+
Value interface{}
261262
hash uint32
262-
value interface{}
263263
}
264264

265265
// sNode is a singleton node which contains a single key and value.
266266
type sNode struct {
267-
*entry
267+
*Entry
268268
}
269269

270270
// New creates an empty Ctrie which uses the provided HashFactory for key
@@ -294,24 +294,24 @@ func newCtrie(root *iNode, hashFactory HashFactory, readOnly bool) *Ctrie {
294294
// the key already exists.
295295
func (c *Ctrie) Insert(key []byte, value interface{}) {
296296
c.assertReadWrite()
297-
c.insert(&entry{
298-
key: key,
297+
c.insert(&Entry{
298+
Key: key,
299+
Value: value,
299300
hash: c.hash(key),
300-
value: value,
301301
})
302302
}
303303

304304
// Lookup returns the value for the associated key or returns false if the key
305305
// doesn't exist.
306306
func (c *Ctrie) Lookup(key []byte) (interface{}, bool) {
307-
return c.lookup(&entry{key: key, hash: c.hash(key)})
307+
return c.lookup(&Entry{Key: key, hash: c.hash(key)})
308308
}
309309

310310
// Remove deletes the value for the associated key, returning true if it was
311311
// removed or false if the entry doesn't exist.
312312
func (c *Ctrie) Remove(key []byte) (interface{}, bool) {
313313
c.assertReadWrite()
314-
return c.remove(&entry{key: key, hash: c.hash(key)})
314+
return c.remove(&Entry{Key: key, hash: c.hash(key)})
315315
}
316316

317317
// Snapshot returns a stable, point-in-time snapshot of the Ctrie.
@@ -340,20 +340,96 @@ func (c *Ctrie) ReadOnlySnapshot() *Ctrie {
340340
}
341341
}
342342

343+
// Clear removes all keys from the Ctrie.
344+
func (c *Ctrie) Clear() {
345+
for {
346+
root := c.readRoot()
347+
gen := &generation{}
348+
newRoot := &iNode{
349+
main: &mainNode{cNode: &cNode{array: make([]branch, 0), gen: gen}},
350+
gen: gen,
351+
}
352+
if c.rdcssRoot(root, gcasRead(root, c), newRoot) {
353+
return
354+
}
355+
}
356+
}
357+
358+
// Iterator returns a channel which yields the Entries of the Ctrie. If a
359+
// cancel channel is provided, closing it will terminate and close the iterator
360+
// channel. Note that if a cancel channel is not used and not every entry is
361+
// read from the iterator, a goroutine will leak.
362+
func (c *Ctrie) Iterator(cancel <-chan struct{}) <-chan *Entry {
363+
ch := make(chan *Entry)
364+
snapshot := c.ReadOnlySnapshot()
365+
go func() {
366+
traverse(snapshot.root, ch, cancel)
367+
close(ch)
368+
}()
369+
return ch
370+
}
371+
372+
// Size returns the number of keys in the Ctrie.
373+
func (c *Ctrie) Size() uint {
374+
// TODO: The size operation can be optimized further by caching the size
375+
// information in main nodes of a read-only Ctrie – this reduces the
376+
// amortized complexity of the size operation to O(1) because the size
377+
// computation is amortized across the update operations that occurred
378+
// since the last snapshot.
379+
size := uint(0)
380+
for _ = range c.Iterator(nil) {
381+
size++
382+
}
383+
return size
384+
}
385+
386+
var errCanceled = errors.New("canceled")
387+
388+
func traverse(i *iNode, ch chan<- *Entry, cancel <-chan struct{}) error {
389+
switch {
390+
case i.main.cNode != nil:
391+
for _, br := range i.main.cNode.array {
392+
switch b := br.(type) {
393+
case *iNode:
394+
if err := traverse(b, ch, cancel); err != nil {
395+
return err
396+
}
397+
case *sNode:
398+
select {
399+
case ch <- b.Entry:
400+
case <-cancel:
401+
return errCanceled
402+
}
403+
}
404+
}
405+
case i.main.lNode != nil:
406+
for _, e := range i.main.lNode.Map(func(sn interface{}) interface{} {
407+
return sn.(*sNode).Entry
408+
}) {
409+
select {
410+
case ch <- e.(*Entry):
411+
case <-cancel:
412+
return errCanceled
413+
}
414+
}
415+
}
416+
return nil
417+
}
418+
343419
func (c *Ctrie) assertReadWrite() {
344420
if c.readOnly {
345421
panic("Cannot modify read-only snapshot")
346422
}
347423
}
348424

349-
func (c *Ctrie) insert(entry *entry) {
425+
func (c *Ctrie) insert(entry *Entry) {
350426
root := c.readRoot()
351427
if !c.iinsert(root, entry, 0, nil, root.gen) {
352428
c.insert(entry)
353429
}
354430
}
355431

356-
func (c *Ctrie) lookup(entry *entry) (interface{}, bool) {
432+
func (c *Ctrie) lookup(entry *Entry) (interface{}, bool) {
357433
root := c.readRoot()
358434
result, exists, ok := c.ilookup(root, entry, 0, nil, root.gen)
359435
for !ok {
@@ -362,7 +438,7 @@ func (c *Ctrie) lookup(entry *entry) (interface{}, bool) {
362438
return result, exists
363439
}
364440

365-
func (c *Ctrie) remove(entry *entry) (interface{}, bool) {
441+
func (c *Ctrie) remove(entry *Entry) (interface{}, bool) {
366442
root := c.readRoot()
367443
result, exists, ok := c.iremove(root, entry, 0, nil, root.gen)
368444
for !ok {
@@ -383,7 +459,7 @@ func (c *Ctrie) hash(k []byte) uint32 {
383459

384460
// iinsert attempts to insert the entry into the Ctrie. If false is returned,
385461
// the operation should be retried.
386-
func (c *Ctrie) iinsert(i *iNode, entry *entry, lev uint, parent *iNode, startGen *generation) bool {
462+
func (c *Ctrie) iinsert(i *iNode, entry *Entry, lev uint, parent *iNode, startGen *generation) bool {
387463
// Linearization point.
388464
main := gcasRead(i, c)
389465
switch {
@@ -417,7 +493,7 @@ func (c *Ctrie) iinsert(i *iNode, entry *entry, lev uint, parent *iNode, startGe
417493
return false
418494
case *sNode:
419495
sn := branch.(*sNode)
420-
if !bytes.Equal(sn.key, entry.key) {
496+
if !bytes.Equal(sn.Key, entry.Key) {
421497
// If the branch is an S-node and its key is not equal to the
422498
// key being inserted, then the Ctrie has to be extended with
423499
// an additional level. The C-node is replaced with its updated
@@ -457,7 +533,7 @@ func (c *Ctrie) iinsert(i *iNode, entry *entry, lev uint, parent *iNode, startGe
457533
// values are the entry value and whether or not the entry was contained in the
458534
// Ctrie. The last bool indicates if the operation succeeded. False means it
459535
// should be retried.
460-
func (c *Ctrie) ilookup(i *iNode, entry *entry, lev uint, parent *iNode, startGen *generation) (interface{}, bool, bool) {
536+
func (c *Ctrie) ilookup(i *iNode, entry *Entry, lev uint, parent *iNode, startGen *generation) (interface{}, bool, bool) {
461537
// Linearization point.
462538
main := gcasRead(i, c)
463539
switch {
@@ -490,8 +566,8 @@ func (c *Ctrie) ilookup(i *iNode, entry *entry, lev uint, parent *iNode, startGe
490566
// equal, the corresponding value from the S-node is
491567
// returned and a NOTFOUND value otherwise.
492568
sn := branch.(*sNode)
493-
if bytes.Equal(sn.key, entry.key) {
494-
return sn.value, true, true
569+
if bytes.Equal(sn.Key, entry.Key) {
570+
return sn.Value, true, true
495571
}
496572
return nil, false, true
497573
default:
@@ -513,7 +589,7 @@ func (c *Ctrie) ilookup(i *iNode, entry *entry, lev uint, parent *iNode, startGe
513589
// values are the entry value and whether or not the entry was contained in the
514590
// Ctrie. The last bool indicates if the operation succeeded. False means it
515591
// should be retried.
516-
func (c *Ctrie) iremove(i *iNode, entry *entry, lev uint, parent *iNode, startGen *generation) (interface{}, bool, bool) {
592+
func (c *Ctrie) iremove(i *iNode, entry *Entry, lev uint, parent *iNode, startGen *generation) (interface{}, bool, bool) {
517593
// Linearization point.
518594
main := gcasRead(i, c)
519595
switch {
@@ -543,7 +619,7 @@ func (c *Ctrie) iremove(i *iNode, entry *entry, lev uint, parent *iNode, startGe
543619
// If the branch is an S-node, its key is compared against the key
544620
// being removed.
545621
sn := branch.(*sNode)
546-
if !bytes.Equal(sn.key, entry.key) {
622+
if !bytes.Equal(sn.Key, entry.Key) {
547623
// If the keys are not equal, the NOTFOUND value is returned.
548624
return nil, false, true
549625
}
@@ -562,7 +638,7 @@ func (c *Ctrie) iremove(i *iNode, entry *entry, lev uint, parent *iNode, startGe
562638
cleanParent(parent, i, entry.hash, lev-w, c, startGen)
563639
}
564640
}
565-
return sn.value, true, true
641+
return sn.Value, true, true
566642
}
567643
return nil, false, false
568644
default:
@@ -642,13 +718,13 @@ func clean(i *iNode, lev uint, ctrie *Ctrie) bool {
642718
return true
643719
}
644720

645-
func cleanReadOnly(tn *tNode, lev uint, p *iNode, ctrie *Ctrie, entry *entry) (val interface{}, exists bool, ok bool) {
721+
func cleanReadOnly(tn *tNode, lev uint, p *iNode, ctrie *Ctrie, entry *Entry) (val interface{}, exists bool, ok bool) {
646722
if !ctrie.readOnly {
647723
clean(p, lev-5, ctrie)
648724
return nil, false, false
649725
}
650-
if tn.hash == entry.hash && bytes.Equal(tn.key, entry.key) {
651-
return tn.value, true, true
726+
if tn.hash == entry.hash && bytes.Equal(tn.Key, entry.Key) {
727+
return tn.Value, true, true
652728
}
653729
return nil, false, true
654730
}

0 commit comments

Comments
 (0)