From 81d46ad402c8230d314c61b437641423ad6b46fe Mon Sep 17 00:00:00 2001 From: Marcelo Pires Date: Tue, 4 Aug 2020 20:38:19 +0100 Subject: [PATCH 1/2] add (dec)compress variants for reducing allocations --- compress.go | 37 +++++++++-- compress_test.go | 55 ++++++++++++++++ decompress.go | 161 +++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 249 insertions(+), 4 deletions(-) diff --git a/compress.go b/compress.go index 1f1756b..044618f 100644 --- a/compress.go +++ b/compress.go @@ -9,8 +9,8 @@ func appendMulti(out []byte, t int) []byte { return out } -func compress(in []byte) (out []byte, sz int) { - var m_off int +func compress(in []byte, out []byte) ([]byte, int) { + var m_off, sz int in_len := len(in) ip_len := in_len - m2_MAX_LEN - 5 dict := make([]int32, 1< 0 { @@ -176,3 +176,32 @@ func Compress1X(in []byte) (out []byte) { out = append(out, m4_MARKER|1, 0, 0) return } + +func Compress1XTo(in []byte, out []byte) []byte { + var t int + + in_len := len(in) + if in_len <= m2_MAX_LEN+5 { + t = in_len + } else { + out, t = compress(in, out) + } + + if t > 0 { + ii := in_len - t + if len(out) == 0 && t <= 238 { + out = append(out, byte(17+t)) + } else if t <= 3 { + out[len(out)-2] |= byte(t) + } else if t <= 18 { + out = append(out, byte(t-3)) + } else { + out = append(out, 0) + out = appendMulti(out, t-18) + } + out = append(out, in[ii:ii+t]...) + } + + out = append(out, m4_MARKER|1, 0, 0) + return out +} diff --git a/compress_test.go b/compress_test.go index d6661d3..d9e6309 100644 --- a/compress_test.go +++ b/compress_test.go @@ -179,11 +179,38 @@ func BenchmarkComp(b *testing.B) { io.Copy(&buf, gz) b.ResetTimer() + b.ReportAllocs() for i := 0; i < b.N; i++ { Compress1X(buf.Bytes()) } } +func BenchmarkCompress1XTo(b *testing.B) { + f, err := os.Open("testdata/large.tar.gz") + if err != nil { + b.Fatal(err) + } + defer f.Close() + + gz, err := gzip.NewReader(f) + if err != nil { + b.Error(err) + return + } + defer gz.Close() + + var buf bytes.Buffer + io.Copy(&buf, gz) + + b.ResetTimer() + + var out = make([]byte, buf.Len()) + b.ReportAllocs() + for i := 0; i < b.N; i++ { + out = Compress1XTo(buf.Bytes(), out[:0]) + } +} + func BenchmarkDecomp(b *testing.B) { f, err := os.Open("testdata/large.tar.gz") if err != nil { @@ -203,7 +230,35 @@ func BenchmarkDecomp(b *testing.B) { cmp := Compress1X(buf.Bytes()) b.ResetTimer() + b.ReportAllocs() + for i := 0; i < b.N; i++ { Decompress1X(bytes.NewReader(cmp), len(cmp), buf.Len()) } } + +func BenchmarkDecompTo(b *testing.B) { + f, err := os.Open("testdata/large.tar.gz") + if err != nil { + b.Fatal(err) + } + defer f.Close() + + gz, err := gzip.NewReader(f) + if err != nil { + b.Error(err) + return + } + defer gz.Close() + + var buf bytes.Buffer + io.Copy(&buf, gz) + + cmp := Compress1X(buf.Bytes()) + out := make([]byte, 0, buf.Len()) + b.ResetTimer() + b.ReportAllocs() + for i := 0; i < b.N; i++ { + Decompress1XTo(bytes.NewReader(cmp), len(cmp), buf.Len(), out) + } +} diff --git a/decompress.go b/decompress.go index 4f83850..903cf7e 100644 --- a/decompress.go +++ b/decompress.go @@ -288,3 +288,164 @@ match_end: ip = in.ReadU8() goto begin_loop } + +// Decompress an input compressed with LZO1X. +// +// LZO1X has a stream terminator marker, so the decompression will always stop +// when this marker is found. +// +// If inLen is not zero, it is expected to match the length of the compressed +// input stream, and it is used to limit reads from the underlying reader; if +// inLen is smaller than the real stream, the decompression will abort with an +// error; if inLen is larger than the real stream, or if it is zero, the +// decompression will succeed but more bytes than necessary might be read +// from the underlying reader. If the reader returns EOF before the termination +// marker is found, the decompression aborts and EOF is returned. +// +// outLen is optional; if it's not zero, it is used as a hint to preallocate the +// output buffer to increase performance of the decompression. +func Decompress1XTo(r io.Reader, inLen int, outLen int, o []byte) (out []byte, err error) { + var t, m_pos int + var last2 byte + + defer func() { + // To gain performance, we don't do any bounds checking while reading + // the input, so if the decompressor reads past the end of the input + // stream, a runtime error is raised. This saves about 7% of performance + // as the reading functions are very hot in the decompressor. + if r := recover(); r != nil { + if re, ok := r.(runtime.Error); ok { + if strings.Contains(re.Error(), "index out of range") { + err = io.EOF + return + } + } + panic(r) + } + }() + + if cap(o) < outLen { + out = make([]byte, 0, outLen) + } else { + out = o[:0] + } + + in := newReader(r, inLen) + ip := in.ReadU8() + if ip > 17 { + t = int(ip) - 17 + if t < 4 { + goto match_next + } + in.ReadAppend(&out, t) + // fmt.Println("begin:", string(out)) + goto first_literal_run + } + +begin_loop: + t = int(ip) + if t >= 16 { + goto match + } + if t == 0 { + t = in.ReadMulti(15) + } + in.ReadAppend(&out, t+3) + // fmt.Println("readappend", t+3, string(out[len(out)-t-3:])) +first_literal_run: + ip = in.ReadU8() + last2 = ip + t = int(ip) + if t >= 16 { + goto match + } + m_pos = len(out) - (1 + m2_MAX_OFFSET) + m_pos -= t >> 2 + ip = in.ReadU8() + m_pos -= int(ip) << 2 + // fmt.Println("m_pos flr", m_pos, len(out), "\n", string(out)) + if m_pos < 0 { + err = LookBehindUnderrun + return + } + copyMatch(&out, m_pos, 3) + goto match_done + +match: + in.Rebuffer() + if in.Err != nil { + err = in.Err + return + } + t = int(ip) + last2 = ip + if t >= 64 { + m_pos = len(out) - 1 + m_pos -= (t >> 2) & 7 + ip = in.ReadU8() + m_pos -= int(ip) << 3 + // fmt.Println("m_pos t64", m_pos, t, int(ip)) + t = (t >> 5) - 1 + goto copy_match + } else if t >= 32 { + t &= 31 + if t == 0 { + t = in.ReadMulti(31) + } + m_pos = len(out) - 1 + v16 := in.ReadU16() + m_pos -= v16 >> 2 + last2 = byte(v16 & 0xFF) + // fmt.Println("m_pos t32", m_pos) + } else if t >= 16 { + m_pos = len(out) + m_pos -= (t & 8) << 11 + t &= 7 + if t == 0 { + t = in.ReadMulti(7) + } + v16 := in.ReadU16() + m_pos -= v16 >> 2 + if m_pos == len(out) { + // fmt.Println("END", t, v16, m_pos) + return + } + m_pos -= 0x4000 + last2 = byte(v16 & 0xFF) + // fmt.Println("m_pos t16", m_pos) + } else { + m_pos = len(out) - 1 + m_pos -= t >> 2 + ip = in.ReadU8() + m_pos -= int(ip) << 2 + if m_pos < 0 { + err = LookBehindUnderrun + return + } + // fmt.Println("m_pos tX", m_pos) + copyMatch(&out, m_pos, 2) + goto match_done + } + +copy_match: + if m_pos < 0 { + err = LookBehindUnderrun + return + } + copyMatch(&out, m_pos, t+2) + +match_done: + t = int(last2 & 3) + if t == 0 { + goto match_end + } +match_next: + // fmt.Println("read append finale:", t) + in.ReadAppend(&out, t) + ip = in.ReadU8() + goto match + +match_end: + ip = in.ReadU8() + goto begin_loop +} From 2a559aae1a0a6f00adfbbdb6b45cf9920bc33c01 Mon Sep 17 00:00:00 2001 From: Marcelo Pires Date: Wed, 5 Aug 2020 11:29:55 +0100 Subject: [PATCH 2/2] increase test timeout --- .github/workflows/go.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/go.yml b/.github/workflows/go.yml index 6ae7321..1ba8376 100644 --- a/.github/workflows/go.yml +++ b/.github/workflows/go.yml @@ -20,6 +20,6 @@ jobs: - name: Lint and Test via ${{ matrix.goVer }} run: | go mod tidy -v && git diff --no-patch --exit-code - go test -v -race -cover -timeout=20m -coverpkg=./... -covermode=atomic -coverprofile=coverage.txt ./... && bash <(curl -s https://codecov.io/bash) + go test -v -race -cover -timeout=29m -coverpkg=./... -covermode=atomic -coverprofile=coverage.txt ./... && bash <(curl -s https://codecov.io/bash) - name: Build on ${{ matrix.goVer }} run: go build ./...