Skip to content

Commit a6c43b3

Browse files
committed
add QueryBinary, an alloc-free way to read all rows into a buffer
WIP; goal is alloc-free reads of a query into a Go-provided buffer. Go code can then parse the simple binary format and alloc if needed (doing its own cache lookups, including alloc-free m[string([]byte)] lookups, and returning existing Views if data is unmodified) Signed-off-by: Brad Fitzpatrick <bradfitz@tailscale.com>
1 parent 35b6e8e commit a6c43b3

File tree

7 files changed

+359
-2
lines changed

7 files changed

+359
-2
lines changed

binary.go

Lines changed: 147 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,147 @@
1+
// Copyright (c) 2021 Tailscale Inc & AUTHORS All rights reserved.
2+
// Use of this source code is governed by a BSD-style
3+
// license that can be found in the LICENSE file.
4+
5+
package sqlite
6+
7+
import (
8+
"context"
9+
"encoding/binary"
10+
"fmt"
11+
"math"
12+
13+
"github.com/tailscale/sqlite/sqliteh"
14+
"golang.org/x/sys/cpu"
15+
)
16+
17+
func QueryBinary(ctx context.Context, sqlconn SQLConn, optScratch []byte, query string, args ...any) (BinaryResults, error) {
18+
var ret BinaryResults
19+
err := sqlconn.Raw(func(driverConn any) error {
20+
c, ok := driverConn.(*conn)
21+
if !ok {
22+
return fmt.Errorf("sqlite.TxnState: sql.Conn is not the sqlite driver: %T", driverConn)
23+
}
24+
st, err := c.prepare(ctx, query, IsPersist(ctx))
25+
if err != nil {
26+
return err
27+
}
28+
buf := optScratch
29+
if len(buf) == 0 {
30+
buf = make([]byte, 128)
31+
}
32+
for {
33+
st.stmt.ResetAndClear()
34+
// TODO(bradfitz): bind args
35+
n, err := st.stmt.StepAllBinary(buf)
36+
if err == nil {
37+
ret = BinaryResults(buf[:n])
38+
return nil
39+
}
40+
if e, ok := err.(sqliteh.BufferSizeTooSmallError); ok {
41+
buf = make([]byte, e.EncodedSize)
42+
continue
43+
}
44+
return err
45+
}
46+
})
47+
if err != nil {
48+
return nil, err
49+
}
50+
return ret, nil
51+
}
52+
53+
// BinaryResults is the result of QueryBinary.
54+
//
55+
// You should not depend on its specific format and parse it via its methods
56+
// instead.
57+
type BinaryResults []byte
58+
59+
type BinaryToken struct {
60+
StartRow bool
61+
EndRow bool
62+
EndRows bool
63+
IsInt bool // if so, use Int() method
64+
IsFloat bool // if so, use Float() method
65+
IsNull bool
66+
IsBytes bool
67+
Error bool
68+
69+
x uint64
70+
Bytes []byte
71+
}
72+
73+
func (t *BinaryToken) String() string {
74+
switch {
75+
case t.StartRow:
76+
return "start-row"
77+
case t.EndRow:
78+
return "end-row"
79+
case t.EndRows:
80+
return "end-rows"
81+
case t.IsNull:
82+
return "null"
83+
case t.IsInt:
84+
return fmt.Sprintf("int: %v", t.Int())
85+
case t.IsFloat:
86+
return fmt.Sprintf("float: %g", t.Float())
87+
case t.IsBytes:
88+
return fmt.Sprintf("bytes: %q", t.Bytes)
89+
case t.Error:
90+
return "error"
91+
default:
92+
return "unknown"
93+
}
94+
}
95+
96+
func (t *BinaryToken) Int() int64 { return int64(t.x) }
97+
func (t *BinaryToken) Float() float64 { return math.Float64frombits(t.x) }
98+
99+
func (r *BinaryResults) Next() BinaryToken {
100+
if len(*r) == 0 {
101+
return BinaryToken{Error: true}
102+
}
103+
first := (*r)[0]
104+
*r = (*r)[1:]
105+
switch first {
106+
default:
107+
return BinaryToken{Error: true}
108+
case '(':
109+
return BinaryToken{StartRow: true}
110+
case ')':
111+
return BinaryToken{EndRow: true}
112+
case 'E':
113+
return BinaryToken{EndRows: true}
114+
case 'n':
115+
return BinaryToken{IsNull: true}
116+
case 'i', 'f':
117+
if len(*r) < 8 {
118+
return BinaryToken{Error: true}
119+
}
120+
t := BinaryToken{IsInt: first == 'i', IsFloat: first == 'f'}
121+
if cpu.IsBigEndian {
122+
t.x = binary.BigEndian.Uint64((*r)[:8])
123+
} else {
124+
t.x = binary.LittleEndian.Uint64((*r)[:8])
125+
}
126+
*r = (*r)[8:]
127+
return t
128+
case 'b':
129+
if len(*r) < 8 {
130+
return BinaryToken{Error: true}
131+
}
132+
t := BinaryToken{IsBytes: true}
133+
var n int64
134+
if cpu.IsBigEndian {
135+
n = int64(binary.BigEndian.Uint64((*r)[:8]))
136+
} else {
137+
n = int64(binary.LittleEndian.Uint64((*r)[:8]))
138+
}
139+
*r = (*r)[8:]
140+
if int64(len(*r)) < n {
141+
return BinaryToken{Error: true}
142+
}
143+
t.Bytes = (*r)[:n]
144+
*r = (*r)[n:]
145+
return t
146+
}
147+
}

binary_test.go

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
// Copyright (c) 2023 Tailscale Inc & AUTHORS All rights reserved.
2+
// Use of this source code is governed by a BSD-style
3+
// license that can be found in the LICENSE file.
4+
5+
package sqlite
6+
7+
import (
8+
"context"
9+
"math"
10+
"reflect"
11+
"testing"
12+
13+
"github.com/google/go-cmp/cmp"
14+
)
15+
16+
func TestQueryBinary(t *testing.T) {
17+
ctx := WithPersist(context.Background())
18+
db := openTestDB(t)
19+
exec(t, db, "CREATE TABLE t (id INTEGER PRIMARY KEY, f REAL, txt TEXT, blb BLOB)")
20+
exec(t, db, "INSERT INTO t VALUES (?, ?, ?, ?)", math.MinInt64, 1.0, "text-a", "blob-a")
21+
exec(t, db, "INSERT INTO t VALUES (?, ?, ?, ?)", -1, -1.0, "text-b", "blob-b")
22+
exec(t, db, "INSERT INTO t VALUES (?, ?, ?, ?)", 0, 0, "text-c", "blob-c")
23+
exec(t, db, "INSERT INTO t VALUES (?, ?, ?, ?)", 20, 2, "text-d", "blob-d")
24+
exec(t, db, "INSERT INTO t VALUES (?, ?, ?, ?)", math.MaxInt64, nil, "text-e", "blob-e")
25+
exec(t, db, "INSERT INTO t VALUES (?, ?, ?, ?)", 42, 0.25, "text-f", nil)
26+
exec(t, db, "INSERT INTO t VALUES (?, ?, ?, ?)", 43, 1.75, "text-g", nil)
27+
28+
conn, err := db.Conn(ctx)
29+
if err != nil {
30+
t.Fatal(err)
31+
}
32+
33+
buf, err := QueryBinary(ctx, conn, make([]byte, 100), "SELECT * FROM t ORDER BY id")
34+
if err != nil {
35+
t.Fatal(err)
36+
}
37+
t.Logf("Got %d bytes: %q", len(buf), buf)
38+
39+
var got []string
40+
iter := buf
41+
for len(iter) > 0 {
42+
t := iter.Next()
43+
got = append(got, t.String())
44+
if t.Error {
45+
break
46+
}
47+
}
48+
want := []string{
49+
"start-row", "int: -9223372036854775808", "float: 1", "bytes: \"text-a\"", "bytes: \"blob-a\"", "end-row",
50+
"start-row", "int: -1", "float: -1", "bytes: \"text-b\"", "bytes: \"blob-b\"", "end-row",
51+
"start-row", "int: 0", "float: 0", "bytes: \"text-c\"", "bytes: \"blob-c\"", "end-row",
52+
"start-row", "int: 20", "float: 2", "bytes: \"text-d\"", "bytes: \"blob-d\"", "end-row",
53+
"start-row", "int: 42", "float: 0.25", "bytes: \"text-f\"", "null", "end-row",
54+
"start-row", "int: 43", "float: 1.75", "bytes: \"text-g\"", "null", "end-row",
55+
"start-row", "int: 9223372036854775807", "null", "bytes: \"text-e\"", "bytes: \"blob-e\"", "end-row",
56+
"end-rows",
57+
}
58+
if !reflect.DeepEqual(got, want) {
59+
t.Errorf("wrong results\n got: %q\nwant: %q\n\ndiff:\n%s", got, want, cmp.Diff(want, got))
60+
}
61+
62+
allocs := int(testing.AllocsPerRun(10000, func() {
63+
_, err := QueryBinary(ctx, conn, buf, "SELECT * FROM t")
64+
if err != nil {
65+
t.Fatal(err)
66+
}
67+
}))
68+
const maxAllocs = 5 // as of Go 1.20
69+
if allocs > maxAllocs {
70+
t.Errorf("allocs = %v; want max %v", allocs, maxAllocs)
71+
}
72+
}

cgosqlite/cgosqlite.go

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@ package cgosqlite
5454
// #include "cgosqlite.h"
5555
import "C"
5656
import (
57+
"errors"
5758
"sync"
5859
"time"
5960
"unsafe"
@@ -99,6 +100,7 @@ type Stmt struct {
99100
// used as scratch space when calling into cgo
100101
rowid, changes C.sqlite3_int64
101102
duration C.int64_t
103+
encodedSize C.int
102104
}
103105

104106
// Open implements sqliteh.OpenFunc.
@@ -401,6 +403,23 @@ func (stmt *Stmt) ColumnDeclType(col int) string {
401403
return res
402404
}
403405

406+
func (stmt *Stmt) StepAllBinary(dstBuf []byte) (n int, err error) {
407+
if len(dstBuf) == 0 {
408+
return 0, errors.New("zero-length buffer to StepAllBinary")
409+
}
410+
ret := C.ts_sqlite_step_all(stmt.stmt, (*C.char)(unsafe.Pointer(&dstBuf[0])), C.int(len(dstBuf)), &stmt.encodedSize)
411+
412+
if int(stmt.encodedSize) > len(dstBuf) {
413+
return 0, sqliteh.BufferSizeTooSmallError{
414+
EncodedSize: int(stmt.encodedSize),
415+
}
416+
}
417+
if err := errCode(ret); err != nil {
418+
return 0, err
419+
}
420+
return int(stmt.encodedSize), nil
421+
}
422+
404423
var emptyCStr = C.CString("")
405424

406425
func errCode(code C.int) error { return sqliteh.CodeAsError(sqliteh.Code(code)) }

cgosqlite/cgosqlite.h

Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,3 +88,92 @@ static int wal_callback_into_go(void *userData, sqlite3 *db, const char *dbName,
8888
static void ts_sqlite3_wal_hook_go(sqlite3* db) {
8989
sqlite3_wal_hook(db, wal_callback_into_go, 0);
9090
}
91+
92+
static void stepall_add_byte(char* bufBase, int bufLen, int* pos, int* encodedSize, char b) {
93+
(*encodedSize)++;
94+
if (*encodedSize > bufLen) {
95+
return;
96+
}
97+
bufBase[*pos] = b;
98+
(*pos)++;
99+
}
100+
101+
static void stepall_add_int64(char* bufBase, int bufLen, int* pos, int* encodedSize, sqlite3_int64 v) {
102+
(*encodedSize) += 8;
103+
if (*encodedSize > bufLen) {
104+
return;
105+
}
106+
for (int i = 0; i < 8; i++) {
107+
bufBase[*pos] = ((char*)&v)[i];
108+
(*pos)++;
109+
}
110+
}
111+
112+
static void stepall_add_bytes(char* bufBase, int bufLen, int* pos, int* encodedSize, const char* v, int vlen) {
113+
stepall_add_int64(bufBase, bufLen, pos, encodedSize, vlen);
114+
115+
(*encodedSize) += vlen;
116+
if (*encodedSize > bufLen) {
117+
return;
118+
}
119+
strncpy(bufBase + *pos, v, vlen);
120+
(*pos) += vlen;
121+
}
122+
123+
static void ts_sqlite_step_all_encode_row(sqlite3_stmt* stmt, char* bufBase, int bufLen, int* pos, int* encodedSize) {
124+
stepall_add_byte(bufBase, bufLen, pos, encodedSize, '('); // start row
125+
int cols = sqlite3_column_count(stmt);
126+
sqlite3_int64 intVal;
127+
double doubleVal;
128+
129+
for (int col = 0; col < cols; col++) {
130+
int colType = sqlite3_column_type(stmt, col);
131+
switch (colType) {
132+
case SQLITE_INTEGER:
133+
stepall_add_byte(bufBase, bufLen, pos, encodedSize, 'i'); // i for "integer"
134+
intVal = sqlite3_column_int64(stmt, col);
135+
stepall_add_int64(bufBase, bufLen, pos, encodedSize, intVal);
136+
break;
137+
case SQLITE_FLOAT:
138+
stepall_add_byte(bufBase, bufLen, pos, encodedSize, 'f'); // f for "integer"
139+
doubleVal = sqlite3_column_double(stmt, col);
140+
stepall_add_int64(bufBase, bufLen, pos, encodedSize, *(sqlite3_int64*)(&doubleVal)); // ala math.Float64bits
141+
break;
142+
case SQLITE_NULL:
143+
stepall_add_byte(bufBase, bufLen, pos, encodedSize, 'n'); // n for "null"
144+
break;
145+
case SQLITE_TEXT:
146+
case SQLITE_BLOB:
147+
stepall_add_byte(bufBase, bufLen, pos, encodedSize, 'b'); // b for "blob" (but also used for TEXT)
148+
stepall_add_bytes(bufBase, bufLen, pos, encodedSize,
149+
(char*) sqlite3_column_text(stmt, col),
150+
sqlite3_column_bytes(stmt, col));
151+
break;
152+
}
153+
}
154+
155+
stepall_add_byte(bufBase, bufLen, pos, encodedSize, ')'); // end row
156+
}
157+
158+
// encodedSize is initialized to zero and counts how much total space would be required,
159+
// even if bufLen is too small. Only a max of bufLen bytes are written to bufBase.
160+
static int ts_sqlite_step_all(sqlite3_stmt* stmt, char* bufBase, int bufLen, int* encodedSize) {
161+
*encodedSize = 0;
162+
if (bufLen < 1) {
163+
return SQLITE_ERROR;
164+
}
165+
int pos = 0;
166+
167+
while (1) {
168+
int err = sqlite3_step(stmt);
169+
if (err == SQLITE_DONE) {
170+
stepall_add_byte(bufBase, bufLen, &pos, encodedSize, 'E' /* 'E' for End */);
171+
return SQLITE_OK;
172+
}
173+
if (err == SQLITE_ROW) {
174+
ts_sqlite_step_all_encode_row(stmt, bufBase, bufLen, &pos, encodedSize);
175+
} else {
176+
return err;
177+
}
178+
}
179+
}

go.mod

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,8 @@
11
module github.com/tailscale/sqlite
22

33
go 1.20
4+
5+
require (
6+
github.com/google/go-cmp v0.5.9 // indirect
7+
golang.org/x/sys v0.6.0 // indirect
8+
)

sqlite.go

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -184,8 +184,7 @@ func (c *conn) Close() error {
184184
return reserr(c.db, "Conn.Close", "", c.db.Close())
185185
}
186186
func (c *conn) PrepareContext(ctx context.Context, query string) (driver.Stmt, error) {
187-
persist := ctx.Value(persistQuery{}) != nil
188-
return c.prepare(ctx, query, persist)
187+
return c.prepare(ctx, query, IsPersist(ctx))
189188
}
190189

191190
func (c *conn) prepare(ctx context.Context, query string, persist bool) (s *stmt, err error) {
@@ -868,5 +867,10 @@ func WithPersist(ctx context.Context) context.Context {
868867
return context.WithValue(ctx, persistQuery{}, persistQuery{})
869868
}
870869

870+
// IsPersist reports whether the context has the Persist key.
871+
func IsPersist(ctx context.Context) bool {
872+
return ctx.Value(persistQuery{}) != nil
873+
}
874+
871875
// persistQuery is used as a context value.
872876
type persistQuery struct{}

0 commit comments

Comments
 (0)