Skip to content

Commit a430e78

Browse files
Merge pull request #107 from evanh/master
Add functions to allow storage of bitarrays
2 parents 0d929fe + 054d629 commit a430e78

File tree

3 files changed

+352
-0
lines changed

3 files changed

+352
-0
lines changed

bitarray/encoding.go

Lines changed: 213 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,213 @@
1+
/*
2+
Copyright 2014 Workiva, LLC
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
package bitarray
18+
19+
import (
20+
"bytes"
21+
"encoding/binary"
22+
"errors"
23+
"io"
24+
)
25+
26+
// Marshal takes a dense or sparse bit array and serializes it to a
27+
// byte slice.
28+
func Marshal(ba BitArray) ([]byte, error) {
29+
if eba, ok := ba.(*bitArray); ok {
30+
return eba.Serialize()
31+
} else if sba, ok := ba.(*sparseBitArray); ok {
32+
return sba.Serialize()
33+
} else {
34+
return nil, errors.New("not a valid BitArray")
35+
}
36+
}
37+
38+
// Unmarshal takes a byte slice, of the same format produced by Marshal,
39+
// and returns a BitArray.
40+
func Unmarshal(input []byte) (BitArray, error) {
41+
if len(input) == 0 {
42+
return nil, errors.New("no data in input")
43+
}
44+
if input[0] == 'B' {
45+
ret := newBitArray(0)
46+
err := ret.Deserialize(input)
47+
if err != nil {
48+
return nil, err
49+
}
50+
return ret, nil
51+
} else if input[0] == 'S' {
52+
ret := newSparseBitArray()
53+
err := ret.Deserialize(input)
54+
if err != nil {
55+
return nil, err
56+
}
57+
return ret, nil
58+
} else {
59+
return nil, errors.New("unrecognized encoding")
60+
}
61+
}
62+
63+
// Serialize converts the sparseBitArray to a byte slice
64+
func (ba *sparseBitArray) Serialize() ([]byte, error) {
65+
w := new(bytes.Buffer)
66+
67+
var identifier uint8 = 'S'
68+
err := binary.Write(w, binary.LittleEndian, identifier)
69+
if err != nil {
70+
return nil, err
71+
}
72+
73+
blocksLen := uint64(len(ba.blocks))
74+
indexLen := uint64(len(ba.indices))
75+
76+
err = binary.Write(w, binary.LittleEndian, blocksLen)
77+
if err != nil {
78+
return nil, err
79+
}
80+
81+
err = binary.Write(w, binary.LittleEndian, ba.blocks)
82+
if err != nil {
83+
return nil, err
84+
}
85+
86+
err = binary.Write(w, binary.LittleEndian, indexLen)
87+
if err != nil {
88+
return nil, err
89+
}
90+
91+
err = binary.Write(w, binary.LittleEndian, ba.indices)
92+
if err != nil {
93+
return nil, err
94+
}
95+
return w.Bytes(), nil
96+
}
97+
98+
// Deserialize takes the incoming byte slice, and populates the sparseBitArray
99+
// with data in the bytes. Note that this will overwrite any capacity
100+
// specified when creating the sparseBitArray. Also note that if an error
101+
// is returned, the sparseBitArray this is called on might be populated
102+
// with partial data.
103+
func (ret *sparseBitArray) Deserialize(incoming []byte) error {
104+
r := bytes.NewReader(incoming[1:]) // Discard identifier
105+
106+
var intsToRead uint64
107+
err := binary.Read(r, binary.LittleEndian, &intsToRead)
108+
if err != nil {
109+
return err
110+
}
111+
112+
var nextblock block
113+
for i := intsToRead; i > uint64(0); i-- {
114+
err = binary.Read(r, binary.LittleEndian, &nextblock)
115+
if err != nil {
116+
return err
117+
}
118+
ret.blocks = append(ret.blocks, nextblock)
119+
}
120+
121+
err = binary.Read(r, binary.LittleEndian, &intsToRead)
122+
if err != nil {
123+
return err
124+
}
125+
126+
var nextuint uint64
127+
for i := intsToRead; i > uint64(0); i-- {
128+
err = binary.Read(r, binary.LittleEndian, &nextuint)
129+
if err != nil {
130+
return err
131+
}
132+
ret.indices = append(ret.indices, nextuint)
133+
}
134+
135+
return nil
136+
}
137+
138+
// Serialize converts the bitArray to a byte slice.
139+
func (ba *bitArray) Serialize() ([]byte, error) {
140+
w := new(bytes.Buffer)
141+
142+
var identifier uint8 = 'B'
143+
err := binary.Write(w, binary.LittleEndian, identifier)
144+
if err != nil {
145+
return nil, err
146+
}
147+
148+
err = binary.Write(w, binary.LittleEndian, ba.lowest)
149+
if err != nil {
150+
return nil, err
151+
}
152+
err = binary.Write(w, binary.LittleEndian, ba.highest)
153+
if err != nil {
154+
return nil, err
155+
}
156+
157+
var encodedanyset uint8
158+
if ba.anyset {
159+
encodedanyset = 1
160+
} else {
161+
encodedanyset = 0
162+
}
163+
err = binary.Write(w, binary.LittleEndian, encodedanyset)
164+
if err != nil {
165+
return nil, err
166+
}
167+
168+
err = binary.Write(w, binary.LittleEndian, ba.blocks)
169+
if err != nil {
170+
return nil, err
171+
}
172+
return w.Bytes(), nil
173+
}
174+
175+
// Deserialize takes the incoming byte slice, and populates the bitArray
176+
// with data in the bytes. Note that this will overwrite any capacity
177+
// specified when creating the bitArray. Also note that if an error is returned,
178+
// the bitArray this is called on might be populated with partial data.
179+
func (ret *bitArray) Deserialize(incoming []byte) error {
180+
r := bytes.NewReader(incoming[1:]) // Discard identifier
181+
182+
err := binary.Read(r, binary.LittleEndian, &ret.lowest)
183+
if err != nil {
184+
return err
185+
}
186+
187+
err = binary.Read(r, binary.LittleEndian, &ret.highest)
188+
if err != nil {
189+
return err
190+
}
191+
192+
var encodedanyset uint8
193+
err = binary.Read(r, binary.LittleEndian, &encodedanyset)
194+
if err != nil {
195+
return err
196+
}
197+
198+
// anyset defaults to false so we don't need an else statement
199+
if encodedanyset == 1 {
200+
ret.anyset = true
201+
}
202+
203+
var nextblock block
204+
err = binary.Read(r, binary.LittleEndian, &nextblock)
205+
for err == nil {
206+
ret.blocks = append(ret.blocks, nextblock)
207+
err = binary.Read(r, binary.LittleEndian, &nextblock)
208+
}
209+
if err != io.EOF {
210+
return err
211+
}
212+
return nil
213+
}

bitarray/encoding_test.go

Lines changed: 138 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,138 @@
1+
/*
2+
Copyright 2014 Workiva, LLC
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
package bitarray
18+
19+
import (
20+
"testing"
21+
22+
"github.com/stretchr/testify/assert"
23+
)
24+
25+
func TestSparseBitArraySerialization(t *testing.T) {
26+
numItems := uint64(1280)
27+
input := newSparseBitArray()
28+
29+
for i := uint64(0); i < numItems; i++ {
30+
if i%3 == 0 {
31+
input.SetBit(i)
32+
}
33+
}
34+
35+
outBytes, err := input.Serialize()
36+
assert.Equal(t, err, nil)
37+
38+
assert.Equal(t, len(outBytes), 337)
39+
assert.True(t, outBytes[0] == 'S')
40+
expected := []byte{83, 20, 0, 0, 0, 0, 0, 0, 0, 73}
41+
assert.Equal(t, expected, outBytes[:10])
42+
43+
output := newSparseBitArray()
44+
err = output.Deserialize(outBytes)
45+
assert.Equal(t, err, nil)
46+
assert.True(t, input.Equals(output))
47+
}
48+
49+
func TestBitArraySerialization(t *testing.T) {
50+
numItems := uint64(1280)
51+
input := newBitArray(numItems)
52+
53+
for i := uint64(0); i < numItems; i++ {
54+
if i%3 == 0 {
55+
input.SetBit(i)
56+
}
57+
}
58+
59+
outBytes, err := input.Serialize()
60+
assert.Equal(t, err, nil)
61+
62+
// 1280 bits = 20 blocks = 160 bytes, plus lowest and highest at
63+
// 128 bits = 16 bytes plus 1 byte for the anyset param and the identifer
64+
assert.Equal(t, len(outBytes), 178)
65+
66+
expected := []byte{66, 0, 0, 0, 0, 0, 0, 0, 0, 254}
67+
assert.Equal(t, expected, outBytes[:10])
68+
69+
output := newBitArray(0)
70+
err = output.Deserialize(outBytes)
71+
assert.Equal(t, err, nil)
72+
assert.True(t, input.Equals(output))
73+
}
74+
75+
func TestBitArrayMarshalUnmarshal(t *testing.T) {
76+
numItems := uint64(1280)
77+
input := newBitArray(numItems)
78+
79+
for i := uint64(0); i < numItems; i++ {
80+
if i%3 == 0 {
81+
input.SetBit(i)
82+
}
83+
}
84+
85+
outputBytes, err := Marshal(input)
86+
assert.Equal(t, err, nil)
87+
assert.Equal(t, outputBytes[0], byte('B'))
88+
assert.Equal(t, len(outputBytes), 178)
89+
90+
output, err := Unmarshal(outputBytes)
91+
assert.Equal(t, err, nil)
92+
93+
assert.True(t, input.Equals(output))
94+
}
95+
96+
func TestSparseBitArrayMarshalUnmarshal(t *testing.T) {
97+
numItems := uint64(1280)
98+
input := newSparseBitArray()
99+
100+
for i := uint64(0); i < numItems; i++ {
101+
if i%3 == 0 {
102+
input.SetBit(i)
103+
}
104+
}
105+
106+
outputBytes, err := Marshal(input)
107+
assert.Equal(t, err, nil)
108+
assert.Equal(t, outputBytes[0], byte('S'))
109+
assert.Equal(t, len(outputBytes), 337)
110+
111+
output, err := Unmarshal(outputBytes)
112+
assert.Equal(t, err, nil)
113+
114+
assert.True(t, input.Equals(output))
115+
}
116+
117+
func TestUnmarshalErrors(t *testing.T) {
118+
numItems := uint64(1280)
119+
input := newBitArray(numItems)
120+
121+
for i := uint64(0); i < numItems; i++ {
122+
if i%3 == 0 {
123+
input.SetBit(i)
124+
}
125+
}
126+
127+
outputBytes, err := Marshal(input)
128+
129+
outputBytes[0] = 'C'
130+
131+
output, err := Unmarshal(outputBytes)
132+
assert.Error(t, err)
133+
assert.Equal(t, output, nil)
134+
135+
output, err = Unmarshal(nil)
136+
assert.Error(t, err)
137+
assert.Equal(t, output, nil)
138+
}

bitarray/sparse_bitarray.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,7 @@ type sparseBitArray struct {
108108
func (sba *sparseBitArray) SetBit(k uint64) error {
109109
index, position := getIndexAndRemainder(k)
110110
i, inserted := sba.indices.insert(index)
111+
111112
if inserted {
112113
sba.blocks.insert(i)
113114
}

0 commit comments

Comments
 (0)