From d2e0ec0e5436bb50666ff2c6ae56e56941771cc6 Mon Sep 17 00:00:00 2001 From: agelloz Date: Tue, 12 Apr 2022 19:29:48 +0200 Subject: [PATCH 01/29] first --- Makefile | 2 +- bow.go | 8 +- bowappend.gen.go.tmpl | 63 ----- bowappend.gen.go => bowappend.go | 9 +- bowassertion.go | 2 +- bowbuffer.gen.go | 190 --------------- bowbuffer.gen.go.tmpl | 133 ----------- bowbuffer.go | 183 +++++++++++++- bowfill.go | 2 +- bowgetters.go | 14 +- bowjoin.gen.go | 397 ------------------------------- bowjoin.gen.go.tmpl | 184 -------------- bowjoin.go | 390 ++++++++++++++++++++++++++++++ bowmetadata.go | 4 +- bowparquet.go | 329 ++++--------------------- bowparquet_test.go | 47 +--- bowrecord.go | 12 +- bowseries.gen.go | 151 ------------ bowseries.gen.go.tmpl | 85 ------- bowseries.gen_test.go | 20 -- bowseries.go | 144 ++++++++++- bowseries_test.go | 15 ++ bowstring.go | 2 +- bowtypes.go | 36 ++- bowvalues.go | 2 +- datatypes.yml | 16 -- doc.go | 6 - go.mod | 27 ++- go.sum | 191 ++------------- 29 files changed, 902 insertions(+), 1762 deletions(-) delete mode 100644 bowappend.gen.go.tmpl rename bowappend.gen.go => bowappend.go (93%) delete mode 100644 bowbuffer.gen.go delete mode 100644 bowbuffer.gen.go.tmpl delete mode 100644 bowjoin.gen.go delete mode 100644 bowjoin.gen.go.tmpl delete mode 100644 bowseries.gen.go delete mode 100644 bowseries.gen.go.tmpl delete mode 100644 bowseries.gen_test.go delete mode 100644 datatypes.yml delete mode 100644 doc.go diff --git a/Makefile b/Makefile index 8113222..d46f5ac 100644 --- a/Makefile +++ b/Makefile @@ -10,7 +10,7 @@ gen: @go generate $(PKG) lint: - golangci-lint run -E gofmt --fix -v $(PKG) + golangci-lint run -E gofmt,gci --fix -v $(PKG) count: @bash -c $(PWD)/scripts/count-code-lines.sh diff --git a/bow.go b/bow.go index f9e755f..733fd09 100644 --- a/bow.go +++ b/bow.go @@ -6,8 +6,8 @@ import ( "reflect" "time" - "github.com/apache/arrow/go/v7/arrow" - "github.com/apache/arrow/go/v7/arrow/array" + "github.com/apache/arrow/go/v8/arrow" + "github.com/apache/arrow/go/v8/arrow/array" ) // Bow is a wrapper of Apache Arrow array.Record interface. @@ -145,7 +145,7 @@ func NewBowFromColBasedInterfaces(colNames []string, colTypes []Type, colData [] func NewBowFromRowBasedInterfaces(colNames []string, colTypes []Type, rowBasedData [][]interface{}) (Bow, error) { if len(colNames) != len(colTypes) { return nil, errors.New( - "bow.NewBowFromRowBasedInterfaces: mismatch between colNames and colTypes len") + "mismatch between colNames and colTypes len") } bufSlice := make([]Buffer, len(colNames)) @@ -156,7 +156,7 @@ func NewBowFromRowBasedInterfaces(colNames []string, colTypes []Type, rowBasedDa for rowIndex, row := range rowBasedData { if len(row) != len(colNames) { return nil, errors.New( - "bow.NewBowFromRowBasedInterfaces: mismatch between colNames and row lengths") + "mismatch between colNames and row lengths") } for colIndex := range colNames { diff --git a/bowappend.gen.go.tmpl b/bowappend.gen.go.tmpl deleted file mode 100644 index 15ef2eb..0000000 --- a/bowappend.gen.go.tmpl +++ /dev/null @@ -1,63 +0,0 @@ -package bow - -import ( - "fmt" - "github.com/apache/arrow/go/v7/arrow" - "github.com/apache/arrow/go/v7/arrow/array" - "github.com/apache/arrow/go/v7/arrow/memory" -) - -// AppendBows attempts to append bows with equal schemas. -// Different schemas will lead to undefined behavior. -// Resulting metadata is copied from the first bow. -func AppendBows(bows ...Bow) (Bow, error) { - if len(bows) == 0 { - return nil, nil - } - - if len(bows) == 1 { - return bows[0], nil - } - - numRows := 0 - for _, b := range bows { - numRows += b.NumRows() - } - - refBow := bows[0] - seriesSlice := make([]Series, refBow.NumCols()) - - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - for colIndex := 0; colIndex < refBow.NumCols(); colIndex++ { - var newArray arrow.Array - refType := refBow.ColumnType(colIndex) - switch refType { - {{range .Data.types -}} - case {{ .Type }}: - builder := array.New{{ .Type }}Builder(mem) - builder.Resize(numRows) - for _, b := range bows { - if colType := b.ColumnType(colIndex); colType != refType { - return nil, fmt.Errorf( - "bow.AppendBows: incompatible types %v and %v", refType, colType) - } - data := b.(*bow).Column(colIndex).Data() - arr := array.New{{ .Type }}Data(data) - v := {{ .Type }}Values(arr) - valid := getValiditySlice(arr) - builder.AppendValues(v, valid) - } - newArray = builder.NewArray() - {{end -}} - default: - return nil, fmt.Errorf("unsupported type %v", refType) - } - - seriesSlice[colIndex] = Series{ - Name: refBow.ColumnName(colIndex), - Array: newArray, - } - } - - return NewBowWithMetadata(refBow.Metadata(), seriesSlice...) -} diff --git a/bowappend.gen.go b/bowappend.go similarity index 93% rename from bowappend.gen.go rename to bowappend.go index 01280eb..dd898d7 100644 --- a/bowappend.gen.go +++ b/bowappend.go @@ -1,12 +1,11 @@ -// Code generated by bowappend.gen.go.tmpl. DO NOT EDIT. - package bow import ( "fmt" - "github.com/apache/arrow/go/v7/arrow" - "github.com/apache/arrow/go/v7/arrow/array" - "github.com/apache/arrow/go/v7/arrow/memory" + + "github.com/apache/arrow/go/v8/arrow" + "github.com/apache/arrow/go/v8/arrow/array" + "github.com/apache/arrow/go/v8/arrow/memory" ) // AppendBows attempts to append bows with equal schemas. diff --git a/bowassertion.go b/bowassertion.go index ac952e1..2bcc297 100644 --- a/bowassertion.go +++ b/bowassertion.go @@ -1,7 +1,7 @@ package bow import ( - "github.com/apache/arrow/go/v7/arrow/array" + "github.com/apache/arrow/go/v8/arrow/array" ) const ( diff --git a/bowbuffer.gen.go b/bowbuffer.gen.go deleted file mode 100644 index 3e59ee3..0000000 --- a/bowbuffer.gen.go +++ /dev/null @@ -1,190 +0,0 @@ -// Code generated by bowbuffer.gen.go.tmpl. DO NOT EDIT. - -package bow - -import ( - "fmt" - - "github.com/apache/arrow/go/v7/arrow/array" - "github.com/apache/arrow/go/v7/arrow/bitutil" -) - -func NewBuffer(size int, typ Type) Buffer { - switch typ { - case Int64: - return Buffer{ - Data: make([]int64, size), - nullBitmapBytes: make([]byte, bitutil.CeilByte(size)/8), - } - case Float64: - return Buffer{ - Data: make([]float64, size), - nullBitmapBytes: make([]byte, bitutil.CeilByte(size)/8), - } - case Boolean: - return Buffer{ - Data: make([]bool, size), - nullBitmapBytes: make([]byte, bitutil.CeilByte(size)/8), - } - case String: - return Buffer{ - Data: make([]string, size), - nullBitmapBytes: make([]byte, bitutil.CeilByte(size)/8), - } - default: - panic(fmt.Errorf("unsupported type %s", typ)) - } -} - -func NewBufferFromData(data interface{}) Buffer { - var l int - switch data.(type) { - case []int64: - case []float64: - case []bool: - case []string: - default: - panic(fmt.Errorf("unhandled type %T", data)) - } - return Buffer{ - Data: data, - nullBitmapBytes: buildNullBitmapBytes(l, nil), - } -} - -func (b Buffer) Len() int { - switch data := b.Data.(type) { - case []int64: - return len(data) - case []float64: - return len(data) - case []bool: - return len(data) - case []string: - return len(data) - default: - panic(fmt.Errorf("unsupported type '%T'", b.Data)) - } -} - -func (b *Buffer) SetOrDrop(i int, value interface{}) { - var valid bool - switch v := b.Data.(type) { - case []int64: - v[i], valid = Int64.Convert(value).(int64) - case []float64: - v[i], valid = Float64.Convert(value).(float64) - case []bool: - v[i], valid = Boolean.Convert(value).(bool) - case []string: - v[i], valid = String.Convert(value).(string) - default: - panic(fmt.Errorf("unsupported type %T", v)) - } - - if valid { - bitutil.SetBit(b.nullBitmapBytes, i) - } else { - bitutil.ClearBit(b.nullBitmapBytes, i) - } -} - -func (b *Buffer) SetOrDropStrict(i int, value interface{}) { - var valid bool - switch v := b.Data.(type) { - case []int64: - v[i], valid = value.(int64) - case []float64: - v[i], valid = value.(float64) - case []bool: - v[i], valid = value.(bool) - case []string: - v[i], valid = value.(string) - default: - panic(fmt.Errorf("unsupported type %T", v)) - } - - if valid { - bitutil.SetBit(b.nullBitmapBytes, i) - } else { - bitutil.ClearBit(b.nullBitmapBytes, i) - } -} - -func (b *Buffer) GetValue(i int) interface{} { - if bitutil.BitIsNotSet(b.nullBitmapBytes, i) { - return nil - } - switch v := b.Data.(type) { - case []int64: - return v[i] - case []float64: - return v[i] - case []bool: - return v[i] - case []string: - return v[i] - default: - panic(fmt.Errorf("unsupported type %T", v)) - } -} - -func (b Buffer) Less(i, j int) bool { - switch v := b.Data.(type) { - case []int64: - return v[i] < v[j] - case []float64: - return v[i] < v[j] - case []string: - return v[i] < v[j] - case []bool: - return !v[i] && v[j] - default: - panic(fmt.Errorf("unsupported type %T", v)) - } -} - -func (b *bow) NewBufferFromCol(colIndex int) Buffer { - data := b.Column(colIndex).Data() - switch b.ColumnType(colIndex) { - case Int64: - arr := array.NewInt64Data(data) - nullBitmapBytes := arr.NullBitmapBytes()[:bitutil.CeilByte(arr.Data().Len())/8] - nullBitmapBytesCopy := make([]byte, len(nullBitmapBytes)) - copy(nullBitmapBytesCopy, nullBitmapBytes) - return Buffer{ - Data: Int64Values(arr), - nullBitmapBytes: nullBitmapBytesCopy, - } - case Float64: - arr := array.NewFloat64Data(data) - nullBitmapBytes := arr.NullBitmapBytes()[:bitutil.CeilByte(arr.Data().Len())/8] - nullBitmapBytesCopy := make([]byte, len(nullBitmapBytes)) - copy(nullBitmapBytesCopy, nullBitmapBytes) - return Buffer{ - Data: Float64Values(arr), - nullBitmapBytes: nullBitmapBytesCopy, - } - case Boolean: - arr := array.NewBooleanData(data) - nullBitmapBytes := arr.NullBitmapBytes()[:bitutil.CeilByte(arr.Data().Len())/8] - nullBitmapBytesCopy := make([]byte, len(nullBitmapBytes)) - copy(nullBitmapBytesCopy, nullBitmapBytes) - return Buffer{ - Data: BooleanValues(arr), - nullBitmapBytes: nullBitmapBytesCopy, - } - case String: - arr := array.NewStringData(data) - nullBitmapBytes := arr.NullBitmapBytes()[:bitutil.CeilByte(arr.Data().Len())/8] - nullBitmapBytesCopy := make([]byte, len(nullBitmapBytes)) - copy(nullBitmapBytesCopy, nullBitmapBytes) - return Buffer{ - Data: StringValues(arr), - nullBitmapBytes: nullBitmapBytesCopy, - } - default: - panic(fmt.Errorf( - "unsupported type %+v", b.ColumnType(colIndex))) - } -} diff --git a/bowbuffer.gen.go.tmpl b/bowbuffer.gen.go.tmpl deleted file mode 100644 index 358bd20..0000000 --- a/bowbuffer.gen.go.tmpl +++ /dev/null @@ -1,133 +0,0 @@ -package bow - -import ( - "fmt" - - "github.com/apache/arrow/go/v7/arrow/array" - "github.com/apache/arrow/go/v7/arrow/bitutil" -) - -func NewBuffer(size int, typ Type) Buffer { - switch typ { - {{range .Data.types -}} - case {{ .Type }}: - return Buffer{ - Data: make([]{{ .type }}, size), - nullBitmapBytes: make([]byte, bitutil.CeilByte(size) / 8), - } - {{end -}} - default: - panic(fmt.Errorf("unsupported type %s", typ)) - } -} - -func NewBufferFromData(data interface{}) Buffer { - var l int - switch data.(type) { - {{range .Data.types -}} - case []{{ .type }}: - {{end -}} - default: - panic(fmt.Errorf("unhandled type %T", data)) - } - return Buffer{ - Data: data, - nullBitmapBytes: buildNullBitmapBytes(l, nil), - } -} - -func (b Buffer) Len() int { - switch data := b.Data.(type) { - {{range .Data.types -}} - case []{{ .type }}: - return len(data) - {{end -}} - default: - panic(fmt.Errorf("unsupported type '%T'", b.Data)) - } -} - -func (b *Buffer) SetOrDrop(i int, value interface{}) { - var valid bool - switch v := b.Data.(type) { - {{range .Data.types -}} - case []{{ .type }}: - v[i], valid = {{ .Type }}.Convert(value).({{ .type }}) - {{end -}} - default: - panic(fmt.Errorf("unsupported type %T", v)) - } - - if valid { - bitutil.SetBit(b.nullBitmapBytes, i) - } else { - bitutil.ClearBit(b.nullBitmapBytes, i) - } -} - -func (b *Buffer) SetOrDropStrict(i int, value interface{}) { - var valid bool - switch v := b.Data.(type) { - {{range .Data.types -}} - case []{{ .type }}: - v[i], valid = value.({{ .type }}) - {{end -}} - default: - panic(fmt.Errorf("unsupported type %T", v)) - } - - if valid { - bitutil.SetBit(b.nullBitmapBytes, i) - } else { - bitutil.ClearBit(b.nullBitmapBytes, i) - } -} - -func (b *Buffer) GetValue(i int) interface{} { - if bitutil.BitIsNotSet(b.nullBitmapBytes, i) { - return nil - } - switch v := b.Data.(type) { - {{range .Data.types -}} - case []{{ .type }}: - return v[i] - {{end -}} - default: - panic(fmt.Errorf("unsupported type %T", v)) - } -} - -func (b Buffer) Less(i, j int) bool { - switch v := b.Data.(type) { - {{range .Data.types -}} - {{if .comparable -}} - case []{{ .type }}: - return v[i] < v[j] - {{end -}} - {{end -}} - case []bool: - return !v[i] && v[j] - default: - panic(fmt.Errorf("unsupported type %T", v)) - } -} - -func (b *bow) NewBufferFromCol(colIndex int) Buffer { - data := b.Column(colIndex).Data() - switch b.ColumnType(colIndex) { - {{range .Data.types -}} - case {{ .Type }}: - arr := array.New{{ .Type }}Data(data) - nullBitmapBytes := arr.NullBitmapBytes()[:bitutil.CeilByte(arr.Data().Len())/8] - nullBitmapBytesCopy := make([]byte, len(nullBitmapBytes)) - copy(nullBitmapBytesCopy, nullBitmapBytes) - return Buffer{ - Data: {{ .Type }}Values(arr), - nullBitmapBytes: nullBitmapBytesCopy, - } - {{end -}} - default: - panic(fmt.Errorf( - "unsupported type %+v", b.ColumnType(colIndex))) - } -} diff --git a/bowbuffer.go b/bowbuffer.go index 26dd72a..61b02c9 100644 --- a/bowbuffer.go +++ b/bowbuffer.go @@ -4,7 +4,8 @@ import ( "fmt" "sort" - "github.com/apache/arrow/go/v7/arrow/bitutil" + "github.com/apache/arrow/go/v8/arrow/array" + "github.com/apache/arrow/go/v8/arrow/bitutil" ) type Buffer struct { @@ -12,6 +13,186 @@ type Buffer struct { nullBitmapBytes []byte } +func NewBuffer(size int, typ Type) Buffer { + switch typ { + case Int64: + return Buffer{ + Data: make([]int64, size), + nullBitmapBytes: make([]byte, bitutil.CeilByte(size)/8), + } + case Float64: + return Buffer{ + Data: make([]float64, size), + nullBitmapBytes: make([]byte, bitutil.CeilByte(size)/8), + } + case Boolean: + return Buffer{ + Data: make([]bool, size), + nullBitmapBytes: make([]byte, bitutil.CeilByte(size)/8), + } + case String: + return Buffer{ + Data: make([]string, size), + nullBitmapBytes: make([]byte, bitutil.CeilByte(size)/8), + } + default: + panic(fmt.Errorf("unsupported type %s", typ)) + } +} + +func NewBufferFromData(data interface{}) Buffer { + var l int + switch data.(type) { + case []int64: + case []float64: + case []bool: + case []string: + default: + panic(fmt.Errorf("unhandled type %T", data)) + } + return Buffer{ + Data: data, + nullBitmapBytes: buildNullBitmapBytes(l, nil), + } +} + +func (b Buffer) Len() int { + switch data := b.Data.(type) { + case []int64: + return len(data) + case []float64: + return len(data) + case []bool: + return len(data) + case []string: + return len(data) + default: + panic(fmt.Errorf("unsupported type '%T'", b.Data)) + } +} + +func (b *Buffer) SetOrDrop(i int, value interface{}) { + var valid bool + switch v := b.Data.(type) { + case []int64: + v[i], valid = Int64.Convert(value).(int64) + case []float64: + v[i], valid = Float64.Convert(value).(float64) + case []bool: + v[i], valid = Boolean.Convert(value).(bool) + case []string: + v[i], valid = String.Convert(value).(string) + default: + panic(fmt.Errorf("unsupported type %T", v)) + } + + if valid { + bitutil.SetBit(b.nullBitmapBytes, i) + } else { + bitutil.ClearBit(b.nullBitmapBytes, i) + } +} + +func (b *Buffer) SetOrDropStrict(i int, value interface{}) { + var valid bool + switch v := b.Data.(type) { + case []int64: + v[i], valid = value.(int64) + case []float64: + v[i], valid = value.(float64) + case []bool: + v[i], valid = value.(bool) + case []string: + v[i], valid = value.(string) + default: + panic(fmt.Errorf("unsupported type %T", v)) + } + + if valid { + bitutil.SetBit(b.nullBitmapBytes, i) + } else { + bitutil.ClearBit(b.nullBitmapBytes, i) + } +} + +func (b *Buffer) GetValue(i int) interface{} { + if bitutil.BitIsNotSet(b.nullBitmapBytes, i) { + return nil + } + switch v := b.Data.(type) { + case []int64: + return v[i] + case []float64: + return v[i] + case []bool: + return v[i] + case []string: + return v[i] + default: + panic(fmt.Errorf("unsupported type %T", v)) + } +} + +func (b Buffer) Less(i, j int) bool { + switch v := b.Data.(type) { + case []int64: + return v[i] < v[j] + case []float64: + return v[i] < v[j] + case []string: + return v[i] < v[j] + case []bool: + return !v[i] && v[j] + default: + panic(fmt.Errorf("unsupported type %T", v)) + } +} + +func (b *bow) NewBufferFromCol(colIndex int) Buffer { + data := b.Column(colIndex).Data() + switch b.ColumnType(colIndex) { + case Int64: + arr := array.NewInt64Data(data) + nullBitmapBytes := arr.NullBitmapBytes()[:bitutil.CeilByte(arr.Data().Len())/8] + nullBitmapBytesCopy := make([]byte, len(nullBitmapBytes)) + copy(nullBitmapBytesCopy, nullBitmapBytes) + return Buffer{ + Data: Int64Values(arr), + nullBitmapBytes: nullBitmapBytesCopy, + } + case Float64: + arr := array.NewFloat64Data(data) + nullBitmapBytes := arr.NullBitmapBytes()[:bitutil.CeilByte(arr.Data().Len())/8] + nullBitmapBytesCopy := make([]byte, len(nullBitmapBytes)) + copy(nullBitmapBytesCopy, nullBitmapBytes) + return Buffer{ + Data: Float64Values(arr), + nullBitmapBytes: nullBitmapBytesCopy, + } + case Boolean: + arr := array.NewBooleanData(data) + nullBitmapBytes := arr.NullBitmapBytes()[:bitutil.CeilByte(arr.Data().Len())/8] + nullBitmapBytesCopy := make([]byte, len(nullBitmapBytes)) + copy(nullBitmapBytesCopy, nullBitmapBytes) + return Buffer{ + Data: BooleanValues(arr), + nullBitmapBytes: nullBitmapBytesCopy, + } + case String: + arr := array.NewStringData(data) + nullBitmapBytes := arr.NullBitmapBytes()[:bitutil.CeilByte(arr.Data().Len())/8] + nullBitmapBytesCopy := make([]byte, len(nullBitmapBytes)) + copy(nullBitmapBytesCopy, nullBitmapBytes) + return Buffer{ + Data: StringValues(arr), + nullBitmapBytes: nullBitmapBytesCopy, + } + default: + panic(fmt.Errorf( + "unsupported type %+v", b.ColumnType(colIndex))) + } +} + func buildNullBitmapBytes(dataLength int, validityArray interface{}) []byte { var res []byte nullBitmapLength := bitutil.CeilByte(dataLength) / 8 diff --git a/bowfill.go b/bowfill.go index 1bc51e7..9004a5a 100644 --- a/bowfill.go +++ b/bowfill.go @@ -5,7 +5,7 @@ import ( "math" "sync" - "github.com/apache/arrow/go/v7/arrow/array" + "github.com/apache/arrow/go/v8/arrow/array" ) // FillLinear fills the column toFillColIndex using the Linear interpolation method according diff --git a/bowgetters.go b/bowgetters.go index 01fe9f3..60014e8 100644 --- a/bowgetters.go +++ b/bowgetters.go @@ -4,8 +4,8 @@ import ( "fmt" "sort" - "github.com/apache/arrow/go/v7/arrow" - "github.com/apache/arrow/go/v7/arrow/array" + "github.com/apache/arrow/go/v8/arrow" + "github.com/apache/arrow/go/v8/arrow/array" ) func (b *bow) GetRow(rowIndex int) map[string]interface{} { @@ -35,6 +35,14 @@ func (b *bow) GetValue(colIndex, rowIndex int) interface{} { return array.NewBooleanData(b.Column(colIndex).Data()).Value(rowIndex) case String: return array.NewStringData(b.Column(colIndex).Data()).Value(rowIndex) + case TimestampSec: + return array.NewTimestampData(b.Column(colIndex).Data()).Value(rowIndex) + case TimestampMilli: + return array.NewTimestampData(b.Column(colIndex).Data()).Value(rowIndex) + case TimestampMicro: + return array.NewTimestampData(b.Column(colIndex).Data()).Value(rowIndex) + case TimestampNano: + return array.NewTimestampData(b.Column(colIndex).Data()).Value(rowIndex) default: panic(fmt.Errorf("unsupported type '%s'", b.ColumnType(colIndex))) } @@ -255,7 +263,7 @@ func (b *bow) GetPrevFloat64(colIndex, rowIndex int) (float64, int) { } func (b *bow) ColumnType(colIndex int) Type { - return getBowTypeFromArrowType(b.Schema().Field(colIndex).Type) + return getBowTypeFromArrowTypeFingerprint(b.Schema().Field(colIndex).Type) } func (b *bow) ColumnIndex(colName string) (int, error) { diff --git a/bowjoin.gen.go b/bowjoin.gen.go deleted file mode 100644 index 418e8cf..0000000 --- a/bowjoin.gen.go +++ /dev/null @@ -1,397 +0,0 @@ -// Code generated by bowjoin.gen.go.tmpl. DO NOT EDIT. - -package bow - -import ( - "fmt" - - "github.com/apache/arrow/go/v7/arrow/array" -) - -func innerFillLeftBowCols(newSeries *[]Series, left, right *bow, newNumRows int, - commonRows struct{ l, r []int }) { - - for colIndex := 0; colIndex < left.NumCols(); colIndex++ { - buf := NewBuffer(newNumRows, left.ColumnType(colIndex)) - switch left.ColumnType(colIndex) { - case Int64: - data := array.NewInt64Data(left.Column(colIndex).Data()) - for rowIndex := 0; rowIndex < newNumRows; rowIndex++ { - if data.IsValid(commonRows.l[rowIndex]) { - buf.SetOrDropStrict(rowIndex, data.Value(commonRows.l[rowIndex])) - } - } - case Float64: - data := array.NewFloat64Data(left.Column(colIndex).Data()) - for rowIndex := 0; rowIndex < newNumRows; rowIndex++ { - if data.IsValid(commonRows.l[rowIndex]) { - buf.SetOrDropStrict(rowIndex, data.Value(commonRows.l[rowIndex])) - } - } - case Boolean: - data := array.NewBooleanData(left.Column(colIndex).Data()) - for rowIndex := 0; rowIndex < newNumRows; rowIndex++ { - if data.IsValid(commonRows.l[rowIndex]) { - buf.SetOrDropStrict(rowIndex, data.Value(commonRows.l[rowIndex])) - } - } - case String: - data := array.NewStringData(left.Column(colIndex).Data()) - for rowIndex := 0; rowIndex < newNumRows; rowIndex++ { - if data.IsValid(commonRows.l[rowIndex]) { - buf.SetOrDropStrict(rowIndex, data.Value(commonRows.l[rowIndex])) - } - } - default: - panic(fmt.Errorf("unsupported type '%v'", left.ColumnType(colIndex))) - } - - (*newSeries)[colIndex] = NewSeriesFromBuffer(left.ColumnName(colIndex), buf) - } -} - -func innerFillRightBowCols(newSeries *[]Series, left, right *bow, newNumRows, newNumCols int, - commonCols map[string][]Buffer, commonRows struct{ l, r []int }) { - var rightCol int - - for colIndex := left.NumCols(); colIndex < newNumCols; colIndex++ { - buf := NewBuffer(newNumRows, right.ColumnType(rightCol)) - for commonCols[right.ColumnName(rightCol)] != nil { - rightCol++ - } - - // Fill common rows from right bow - switch right.ColumnType(rightCol) { - case Int64: - data := array.NewInt64Data(right.Column(rightCol).Data()) - for rowIndex := 0; rowIndex < newNumRows; rowIndex++ { - if data.IsValid(commonRows.r[rowIndex]) { - buf.SetOrDropStrict(rowIndex, data.Value(commonRows.r[rowIndex])) - } - } - case Float64: - data := array.NewFloat64Data(right.Column(rightCol).Data()) - for rowIndex := 0; rowIndex < newNumRows; rowIndex++ { - if data.IsValid(commonRows.r[rowIndex]) { - buf.SetOrDropStrict(rowIndex, data.Value(commonRows.r[rowIndex])) - } - } - case Boolean: - data := array.NewBooleanData(right.Column(rightCol).Data()) - for rowIndex := 0; rowIndex < newNumRows; rowIndex++ { - if data.IsValid(commonRows.r[rowIndex]) { - buf.SetOrDropStrict(rowIndex, data.Value(commonRows.r[rowIndex])) - } - } - case String: - data := array.NewStringData(right.Column(rightCol).Data()) - for rowIndex := 0; rowIndex < newNumRows; rowIndex++ { - if data.IsValid(commonRows.r[rowIndex]) { - buf.SetOrDropStrict(rowIndex, data.Value(commonRows.r[rowIndex])) - } - } - default: - panic(fmt.Errorf("unsupported type '%v'", right.ColumnType(rightCol))) - } - - (*newSeries)[colIndex] = NewSeriesFromBuffer(right.ColumnName(rightCol), buf) - rightCol++ - } -} - -func outerFillLeftBowCols(newSeries *[]Series, left, right *bow, newNumRows, uniquesLeft int, - commonCols map[string][]Buffer, commonRows struct{ l, r []int }) { - var leftRow, commonRow int - - for colIndex := 0; colIndex < left.NumCols(); colIndex++ { - leftRow = 0 - commonRow = 0 - buf := NewBuffer(newNumRows, left.ColumnType(colIndex)) - - // Fill rows from left bow - switch left.ColumnType(colIndex) { - case Int64: - data := array.NewInt64Data(left.Column(colIndex).Data()) - for newRow := 0; left.NumRows() > 0 && newRow < newNumRows; newRow++ { - if data.IsValid(leftRow) { - buf.SetOrDropStrict(newRow, data.Value(leftRow)) - } - for commonRow < len(commonRows.l) && - leftRow == commonRows.l[commonRow] && - newRow < newNumRows { - if data.IsValid(leftRow) { - buf.SetOrDropStrict(newRow, data.Value(leftRow)) - } - if commonRow+1 < len(commonRows.l) && - commonRows.l[commonRow+1] == leftRow { - newRow++ - } - commonRow++ - } - if leftRow++; leftRow >= left.NumRows() { - break - } - } - case Float64: - data := array.NewFloat64Data(left.Column(colIndex).Data()) - for newRow := 0; left.NumRows() > 0 && newRow < newNumRows; newRow++ { - if data.IsValid(leftRow) { - buf.SetOrDropStrict(newRow, data.Value(leftRow)) - } - for commonRow < len(commonRows.l) && - leftRow == commonRows.l[commonRow] && - newRow < newNumRows { - if data.IsValid(leftRow) { - buf.SetOrDropStrict(newRow, data.Value(leftRow)) - } - if commonRow+1 < len(commonRows.l) && - commonRows.l[commonRow+1] == leftRow { - newRow++ - } - commonRow++ - } - if leftRow++; leftRow >= left.NumRows() { - break - } - } - case Boolean: - data := array.NewBooleanData(left.Column(colIndex).Data()) - for newRow := 0; left.NumRows() > 0 && newRow < newNumRows; newRow++ { - if data.IsValid(leftRow) { - buf.SetOrDropStrict(newRow, data.Value(leftRow)) - } - for commonRow < len(commonRows.l) && - leftRow == commonRows.l[commonRow] && - newRow < newNumRows { - if data.IsValid(leftRow) { - buf.SetOrDropStrict(newRow, data.Value(leftRow)) - } - if commonRow+1 < len(commonRows.l) && - commonRows.l[commonRow+1] == leftRow { - newRow++ - } - commonRow++ - } - if leftRow++; leftRow >= left.NumRows() { - break - } - } - case String: - data := array.NewStringData(left.Column(colIndex).Data()) - for newRow := 0; left.NumRows() > 0 && newRow < newNumRows; newRow++ { - if data.IsValid(leftRow) { - buf.SetOrDropStrict(newRow, data.Value(leftRow)) - } - for commonRow < len(commonRows.l) && - leftRow == commonRows.l[commonRow] && - newRow < newNumRows { - if data.IsValid(leftRow) { - buf.SetOrDropStrict(newRow, data.Value(leftRow)) - } - if commonRow+1 < len(commonRows.l) && - commonRows.l[commonRow+1] == leftRow { - newRow++ - } - commonRow++ - } - if leftRow++; leftRow >= left.NumRows() { - break - } - } - default: - panic(fmt.Errorf("unsupported type '%v'", left.ColumnType(colIndex))) - } - - // Fill remaining rows from right bow if column is common - _, isColCommon := commonCols[left.ColumnName(colIndex)] - var newRow int - if isColCommon { - newRow = left.NumRows() + len(commonRows.l) - uniquesLeft - } - for rightRow := 0; isColCommon && rightRow < right.NumRows(); rightRow++ { - var isRowCommon bool - for commonRow := 0; commonRow < len(commonRows.r); commonRow++ { - if rightRow == commonRows.r[commonRow] { - isRowCommon = true - break - } - } - if !isRowCommon { - buf.SetOrDropStrict(newRow, commonCols[left.ColumnName(colIndex)][1].GetValue(rightRow)) - newRow++ - } - } - - (*newSeries)[colIndex] = NewSeriesFromBuffer(left.ColumnName(colIndex), buf) - } -} - -func outerFillRightBowCols(newSeries *[]Series, left, right *bow, newNumCols, - newNumRows, uniquesLeft int, commonCols map[string][]Buffer, - commonRows struct{ l, r []int }) { - var leftRow, commonRow, rightCol int - - for colIndex := left.NumCols(); colIndex < newNumCols; colIndex++ { - leftRow = 0 - commonRow = 0 - for commonCols[right.ColumnName(rightCol)] != nil { - rightCol++ - } - buf := NewBuffer(newNumRows, right.ColumnType(rightCol)) - - switch right.ColumnType(rightCol) { - case Int64: - data := array.NewInt64Data(right.Column(rightCol).Data()) - - // Fill common rows from right bow - for newRow := 0; newRow < newNumRows; newRow++ { - for commonRow < len(commonRows.l) && - leftRow == commonRows.l[commonRow] && - newRow < newNumRows { - if data.IsValid(commonRows.r[commonRow]) { - buf.SetOrDropStrict(newRow, data.Value(commonRows.r[commonRow])) - } - if commonRow+1 < len(commonRows.l) && - commonRows.l[commonRow+1] == leftRow { - newRow++ - } - commonRow++ - } - leftRow++ - } - - // Fill remaining rows from right bow - newRow := left.NumRows() + len(commonRows.r) - uniquesLeft - for rightRow := 0; rightRow < right.NumRows(); rightRow++ { - var isRowCommon bool - for commonRow := 0; commonRow < len(commonRows.r); commonRow++ { - if rightRow == commonRows.r[commonRow] { - isRowCommon = true - break - } - } - if !isRowCommon { - if data.IsValid(rightRow) { - buf.SetOrDropStrict(newRow, data.Value(rightRow)) - } - newRow++ - } - } - case Float64: - data := array.NewFloat64Data(right.Column(rightCol).Data()) - - // Fill common rows from right bow - for newRow := 0; newRow < newNumRows; newRow++ { - for commonRow < len(commonRows.l) && - leftRow == commonRows.l[commonRow] && - newRow < newNumRows { - if data.IsValid(commonRows.r[commonRow]) { - buf.SetOrDropStrict(newRow, data.Value(commonRows.r[commonRow])) - } - if commonRow+1 < len(commonRows.l) && - commonRows.l[commonRow+1] == leftRow { - newRow++ - } - commonRow++ - } - leftRow++ - } - - // Fill remaining rows from right bow - newRow := left.NumRows() + len(commonRows.r) - uniquesLeft - for rightRow := 0; rightRow < right.NumRows(); rightRow++ { - var isRowCommon bool - for commonRow := 0; commonRow < len(commonRows.r); commonRow++ { - if rightRow == commonRows.r[commonRow] { - isRowCommon = true - break - } - } - if !isRowCommon { - if data.IsValid(rightRow) { - buf.SetOrDropStrict(newRow, data.Value(rightRow)) - } - newRow++ - } - } - case Boolean: - data := array.NewBooleanData(right.Column(rightCol).Data()) - - // Fill common rows from right bow - for newRow := 0; newRow < newNumRows; newRow++ { - for commonRow < len(commonRows.l) && - leftRow == commonRows.l[commonRow] && - newRow < newNumRows { - if data.IsValid(commonRows.r[commonRow]) { - buf.SetOrDropStrict(newRow, data.Value(commonRows.r[commonRow])) - } - if commonRow+1 < len(commonRows.l) && - commonRows.l[commonRow+1] == leftRow { - newRow++ - } - commonRow++ - } - leftRow++ - } - - // Fill remaining rows from right bow - newRow := left.NumRows() + len(commonRows.r) - uniquesLeft - for rightRow := 0; rightRow < right.NumRows(); rightRow++ { - var isRowCommon bool - for commonRow := 0; commonRow < len(commonRows.r); commonRow++ { - if rightRow == commonRows.r[commonRow] { - isRowCommon = true - break - } - } - if !isRowCommon { - if data.IsValid(rightRow) { - buf.SetOrDropStrict(newRow, data.Value(rightRow)) - } - newRow++ - } - } - case String: - data := array.NewStringData(right.Column(rightCol).Data()) - - // Fill common rows from right bow - for newRow := 0; newRow < newNumRows; newRow++ { - for commonRow < len(commonRows.l) && - leftRow == commonRows.l[commonRow] && - newRow < newNumRows { - if data.IsValid(commonRows.r[commonRow]) { - buf.SetOrDropStrict(newRow, data.Value(commonRows.r[commonRow])) - } - if commonRow+1 < len(commonRows.l) && - commonRows.l[commonRow+1] == leftRow { - newRow++ - } - commonRow++ - } - leftRow++ - } - - // Fill remaining rows from right bow - newRow := left.NumRows() + len(commonRows.r) - uniquesLeft - for rightRow := 0; rightRow < right.NumRows(); rightRow++ { - var isRowCommon bool - for commonRow := 0; commonRow < len(commonRows.r); commonRow++ { - if rightRow == commonRows.r[commonRow] { - isRowCommon = true - break - } - } - if !isRowCommon { - if data.IsValid(rightRow) { - buf.SetOrDropStrict(newRow, data.Value(rightRow)) - } - newRow++ - } - } - default: - panic(fmt.Errorf("unsupported type '%v'", right.ColumnType(rightCol))) - } - (*newSeries)[colIndex] = NewSeriesFromBuffer(right.ColumnName(rightCol), buf) - rightCol++ - } -} diff --git a/bowjoin.gen.go.tmpl b/bowjoin.gen.go.tmpl deleted file mode 100644 index 6aef1cd..0000000 --- a/bowjoin.gen.go.tmpl +++ /dev/null @@ -1,184 +0,0 @@ -package bow - -import ( - "fmt" - - "github.com/apache/arrow/go/v7/arrow/array" -) - -func innerFillLeftBowCols(newSeries *[]Series, left, right *bow, newNumRows int, - commonRows struct{ l, r []int }) { - - for colIndex := 0; colIndex < left.NumCols(); colIndex++ { - buf := NewBuffer(newNumRows, left.ColumnType(colIndex)) - switch left.ColumnType(colIndex) { - {{range .Data.types -}} - case {{ .Type }}: - data := array.New{{ .Type }}Data(left.Column(colIndex).Data()) - for rowIndex := 0; rowIndex < newNumRows; rowIndex++ { - if data.IsValid(commonRows.l[rowIndex]) { - buf.SetOrDropStrict(rowIndex, data.Value(commonRows.l[rowIndex])) - } - } - {{end -}} - default: - panic(fmt.Errorf("unsupported type '%v'", left.ColumnType(colIndex))) - } - - (*newSeries)[colIndex] = NewSeriesFromBuffer(left.ColumnName(colIndex), buf) - } -} - -func innerFillRightBowCols(newSeries *[]Series, left, right *bow, newNumRows, newNumCols int, - commonCols map[string][]Buffer, commonRows struct{ l, r []int }) { - var rightCol int - - for colIndex := left.NumCols(); colIndex < newNumCols; colIndex++ { - buf := NewBuffer(newNumRows, right.ColumnType(rightCol)) - for commonCols[right.ColumnName(rightCol)] != nil { - rightCol++ - } - - // Fill common rows from right bow - switch right.ColumnType(rightCol) { - {{range .Data.types -}} - case {{ .Type }}: - data := array.New{{ .Type }}Data(right.Column(rightCol).Data()) - for rowIndex := 0; rowIndex < newNumRows; rowIndex++ { - if data.IsValid(commonRows.r[rowIndex]) { - buf.SetOrDropStrict(rowIndex, data.Value(commonRows.r[rowIndex])) - } - } - {{end -}} - default: - panic(fmt.Errorf("unsupported type '%v'", right.ColumnType(rightCol))) - } - - (*newSeries)[colIndex] = NewSeriesFromBuffer(right.ColumnName(rightCol), buf) - rightCol++ - } -} - -func outerFillLeftBowCols(newSeries *[]Series, left, right *bow, newNumRows, uniquesLeft int, - commonCols map[string][]Buffer, commonRows struct{ l, r []int }) { - var leftRow, commonRow int - - for colIndex := 0; colIndex < left.NumCols(); colIndex++ { - leftRow = 0 - commonRow = 0 - buf := NewBuffer(newNumRows, left.ColumnType(colIndex)) - - // Fill rows from left bow - switch left.ColumnType(colIndex) { - {{range .Data.types -}} - case {{ .Type }}: - data := array.New{{ .Type }}Data(left.Column(colIndex).Data()) - for newRow := 0; left.NumRows() > 0 && newRow < newNumRows; newRow++ { - if data.IsValid(leftRow) { - buf.SetOrDropStrict(newRow, data.Value(leftRow)) - } - for commonRow < len(commonRows.l) && - leftRow == commonRows.l[commonRow] && - newRow < newNumRows { - if data.IsValid(leftRow) { - buf.SetOrDropStrict(newRow, data.Value(leftRow)) - } - if commonRow+1 < len(commonRows.l) && - commonRows.l[commonRow+1] == leftRow { - newRow++ - } - commonRow++ - } - if leftRow++; leftRow >= left.NumRows() { - break - } - } - {{end -}} - default: - panic(fmt.Errorf("unsupported type '%v'", left.ColumnType(colIndex))) - } - - // Fill remaining rows from right bow if column is common - _, isColCommon := commonCols[left.ColumnName(colIndex)] - var newRow int - if isColCommon { - newRow = left.NumRows() + len(commonRows.l) - uniquesLeft - } - for rightRow := 0; isColCommon && rightRow < right.NumRows(); rightRow++ { - var isRowCommon bool - for commonRow := 0; commonRow < len(commonRows.r); commonRow++ { - if rightRow == commonRows.r[commonRow] { - isRowCommon = true - break - } - } - if !isRowCommon { - buf.SetOrDropStrict(newRow, commonCols[left.ColumnName(colIndex)][1].GetValue(rightRow)) - newRow++ - } - } - - (*newSeries)[colIndex] = NewSeriesFromBuffer(left.ColumnName(colIndex), buf) - } -} - -func outerFillRightBowCols(newSeries *[]Series, left, right *bow, newNumCols, - newNumRows, uniquesLeft int, commonCols map[string][]Buffer, - commonRows struct{ l, r []int }) { - var leftRow, commonRow, rightCol int - - for colIndex := left.NumCols(); colIndex < newNumCols; colIndex++ { - leftRow = 0 - commonRow = 0 - for commonCols[right.ColumnName(rightCol)] != nil { - rightCol++ - } - buf := NewBuffer(newNumRows, right.ColumnType(rightCol)) - - switch right.ColumnType(rightCol) { - {{range .Data.types -}} - case {{ .Type }}: - data := array.New{{ .Type }}Data(right.Column(rightCol).Data()) - - // Fill common rows from right bow - for newRow := 0; newRow < newNumRows; newRow++ { - for commonRow < len(commonRows.l) && - leftRow == commonRows.l[commonRow] && - newRow < newNumRows { - if data.IsValid(commonRows.r[commonRow]) { - buf.SetOrDropStrict(newRow, data.Value(commonRows.r[commonRow])) - } - if commonRow+1 < len(commonRows.l) && - commonRows.l[commonRow+1] == leftRow { - newRow++ - } - commonRow++ - } - leftRow++ - } - - // Fill remaining rows from right bow - newRow := left.NumRows() + len(commonRows.r) - uniquesLeft - for rightRow := 0; rightRow < right.NumRows(); rightRow++ { - var isRowCommon bool - for commonRow := 0; commonRow < len(commonRows.r); commonRow++ { - if rightRow == commonRows.r[commonRow] { - isRowCommon = true - break - } - } - if !isRowCommon { - if data.IsValid(rightRow) { - buf.SetOrDropStrict(newRow, data.Value(rightRow)) - } - newRow++ - } - } - {{end -}} - default: - panic(fmt.Errorf("unsupported type '%v'", right.ColumnType(rightCol))) - } - (*newSeries)[colIndex] = NewSeriesFromBuffer(right.ColumnName(rightCol), buf) - rightCol++ - } -} diff --git a/bowjoin.go b/bowjoin.go index aaa96ec..7cb947e 100644 --- a/bowjoin.go +++ b/bowjoin.go @@ -3,6 +3,8 @@ package bow import ( "fmt" "sort" + + "github.com/apache/arrow/go/v8/arrow/array" ) // InnerJoin joins columns of two Bows on common columns and rows. @@ -182,3 +184,391 @@ func getCommonRows(left, right Bow, commonColBufs map[string][]Buffer) CommonRow return commonRows } + +func innerFillLeftBowCols(newSeries *[]Series, left, right *bow, newNumRows int, + commonRows struct{ l, r []int }) { + + for colIndex := 0; colIndex < left.NumCols(); colIndex++ { + buf := NewBuffer(newNumRows, left.ColumnType(colIndex)) + switch left.ColumnType(colIndex) { + case Int64: + data := array.NewInt64Data(left.Column(colIndex).Data()) + for rowIndex := 0; rowIndex < newNumRows; rowIndex++ { + if data.IsValid(commonRows.l[rowIndex]) { + buf.SetOrDropStrict(rowIndex, data.Value(commonRows.l[rowIndex])) + } + } + case Float64: + data := array.NewFloat64Data(left.Column(colIndex).Data()) + for rowIndex := 0; rowIndex < newNumRows; rowIndex++ { + if data.IsValid(commonRows.l[rowIndex]) { + buf.SetOrDropStrict(rowIndex, data.Value(commonRows.l[rowIndex])) + } + } + case Boolean: + data := array.NewBooleanData(left.Column(colIndex).Data()) + for rowIndex := 0; rowIndex < newNumRows; rowIndex++ { + if data.IsValid(commonRows.l[rowIndex]) { + buf.SetOrDropStrict(rowIndex, data.Value(commonRows.l[rowIndex])) + } + } + case String: + data := array.NewStringData(left.Column(colIndex).Data()) + for rowIndex := 0; rowIndex < newNumRows; rowIndex++ { + if data.IsValid(commonRows.l[rowIndex]) { + buf.SetOrDropStrict(rowIndex, data.Value(commonRows.l[rowIndex])) + } + } + default: + panic(fmt.Errorf("unsupported type '%v'", left.ColumnType(colIndex))) + } + + (*newSeries)[colIndex] = NewSeriesFromBuffer(left.ColumnName(colIndex), buf) + } +} + +func innerFillRightBowCols(newSeries *[]Series, left, right *bow, newNumRows, newNumCols int, + commonCols map[string][]Buffer, commonRows struct{ l, r []int }) { + var rightCol int + + for colIndex := left.NumCols(); colIndex < newNumCols; colIndex++ { + buf := NewBuffer(newNumRows, right.ColumnType(rightCol)) + for commonCols[right.ColumnName(rightCol)] != nil { + rightCol++ + } + + // Fill common rows from right bow + switch right.ColumnType(rightCol) { + case Int64: + data := array.NewInt64Data(right.Column(rightCol).Data()) + for rowIndex := 0; rowIndex < newNumRows; rowIndex++ { + if data.IsValid(commonRows.r[rowIndex]) { + buf.SetOrDropStrict(rowIndex, data.Value(commonRows.r[rowIndex])) + } + } + case Float64: + data := array.NewFloat64Data(right.Column(rightCol).Data()) + for rowIndex := 0; rowIndex < newNumRows; rowIndex++ { + if data.IsValid(commonRows.r[rowIndex]) { + buf.SetOrDropStrict(rowIndex, data.Value(commonRows.r[rowIndex])) + } + } + case Boolean: + data := array.NewBooleanData(right.Column(rightCol).Data()) + for rowIndex := 0; rowIndex < newNumRows; rowIndex++ { + if data.IsValid(commonRows.r[rowIndex]) { + buf.SetOrDropStrict(rowIndex, data.Value(commonRows.r[rowIndex])) + } + } + case String: + data := array.NewStringData(right.Column(rightCol).Data()) + for rowIndex := 0; rowIndex < newNumRows; rowIndex++ { + if data.IsValid(commonRows.r[rowIndex]) { + buf.SetOrDropStrict(rowIndex, data.Value(commonRows.r[rowIndex])) + } + } + default: + panic(fmt.Errorf("unsupported type '%v'", right.ColumnType(rightCol))) + } + + (*newSeries)[colIndex] = NewSeriesFromBuffer(right.ColumnName(rightCol), buf) + rightCol++ + } +} + +func outerFillLeftBowCols(newSeries *[]Series, left, right *bow, newNumRows, uniquesLeft int, + commonCols map[string][]Buffer, commonRows struct{ l, r []int }) { + var leftRow, commonRow int + + for colIndex := 0; colIndex < left.NumCols(); colIndex++ { + leftRow = 0 + commonRow = 0 + buf := NewBuffer(newNumRows, left.ColumnType(colIndex)) + + // Fill rows from left bow + switch left.ColumnType(colIndex) { + case Int64: + data := array.NewInt64Data(left.Column(colIndex).Data()) + for newRow := 0; left.NumRows() > 0 && newRow < newNumRows; newRow++ { + if data.IsValid(leftRow) { + buf.SetOrDropStrict(newRow, data.Value(leftRow)) + } + for commonRow < len(commonRows.l) && + leftRow == commonRows.l[commonRow] && + newRow < newNumRows { + if data.IsValid(leftRow) { + buf.SetOrDropStrict(newRow, data.Value(leftRow)) + } + if commonRow+1 < len(commonRows.l) && + commonRows.l[commonRow+1] == leftRow { + newRow++ + } + commonRow++ + } + if leftRow++; leftRow >= left.NumRows() { + break + } + } + case Float64: + data := array.NewFloat64Data(left.Column(colIndex).Data()) + for newRow := 0; left.NumRows() > 0 && newRow < newNumRows; newRow++ { + if data.IsValid(leftRow) { + buf.SetOrDropStrict(newRow, data.Value(leftRow)) + } + for commonRow < len(commonRows.l) && + leftRow == commonRows.l[commonRow] && + newRow < newNumRows { + if data.IsValid(leftRow) { + buf.SetOrDropStrict(newRow, data.Value(leftRow)) + } + if commonRow+1 < len(commonRows.l) && + commonRows.l[commonRow+1] == leftRow { + newRow++ + } + commonRow++ + } + if leftRow++; leftRow >= left.NumRows() { + break + } + } + case Boolean: + data := array.NewBooleanData(left.Column(colIndex).Data()) + for newRow := 0; left.NumRows() > 0 && newRow < newNumRows; newRow++ { + if data.IsValid(leftRow) { + buf.SetOrDropStrict(newRow, data.Value(leftRow)) + } + for commonRow < len(commonRows.l) && + leftRow == commonRows.l[commonRow] && + newRow < newNumRows { + if data.IsValid(leftRow) { + buf.SetOrDropStrict(newRow, data.Value(leftRow)) + } + if commonRow+1 < len(commonRows.l) && + commonRows.l[commonRow+1] == leftRow { + newRow++ + } + commonRow++ + } + if leftRow++; leftRow >= left.NumRows() { + break + } + } + case String: + data := array.NewStringData(left.Column(colIndex).Data()) + for newRow := 0; left.NumRows() > 0 && newRow < newNumRows; newRow++ { + if data.IsValid(leftRow) { + buf.SetOrDropStrict(newRow, data.Value(leftRow)) + } + for commonRow < len(commonRows.l) && + leftRow == commonRows.l[commonRow] && + newRow < newNumRows { + if data.IsValid(leftRow) { + buf.SetOrDropStrict(newRow, data.Value(leftRow)) + } + if commonRow+1 < len(commonRows.l) && + commonRows.l[commonRow+1] == leftRow { + newRow++ + } + commonRow++ + } + if leftRow++; leftRow >= left.NumRows() { + break + } + } + default: + panic(fmt.Errorf("unsupported type '%v'", left.ColumnType(colIndex))) + } + + // Fill remaining rows from right bow if column is common + _, isColCommon := commonCols[left.ColumnName(colIndex)] + var newRow int + if isColCommon { + newRow = left.NumRows() + len(commonRows.l) - uniquesLeft + } + for rightRow := 0; isColCommon && rightRow < right.NumRows(); rightRow++ { + var isRowCommon bool + for commonRow := 0; commonRow < len(commonRows.r); commonRow++ { + if rightRow == commonRows.r[commonRow] { + isRowCommon = true + break + } + } + if !isRowCommon { + buf.SetOrDropStrict(newRow, commonCols[left.ColumnName(colIndex)][1].GetValue(rightRow)) + newRow++ + } + } + + (*newSeries)[colIndex] = NewSeriesFromBuffer(left.ColumnName(colIndex), buf) + } +} + +func outerFillRightBowCols(newSeries *[]Series, left, right *bow, newNumCols, + newNumRows, uniquesLeft int, commonCols map[string][]Buffer, + commonRows struct{ l, r []int }) { + var leftRow, commonRow, rightCol int + + for colIndex := left.NumCols(); colIndex < newNumCols; colIndex++ { + leftRow = 0 + commonRow = 0 + for commonCols[right.ColumnName(rightCol)] != nil { + rightCol++ + } + buf := NewBuffer(newNumRows, right.ColumnType(rightCol)) + + switch right.ColumnType(rightCol) { + case Int64: + data := array.NewInt64Data(right.Column(rightCol).Data()) + + // Fill common rows from right bow + for newRow := 0; newRow < newNumRows; newRow++ { + for commonRow < len(commonRows.l) && + leftRow == commonRows.l[commonRow] && + newRow < newNumRows { + if data.IsValid(commonRows.r[commonRow]) { + buf.SetOrDropStrict(newRow, data.Value(commonRows.r[commonRow])) + } + if commonRow+1 < len(commonRows.l) && + commonRows.l[commonRow+1] == leftRow { + newRow++ + } + commonRow++ + } + leftRow++ + } + + // Fill remaining rows from right bow + newRow := left.NumRows() + len(commonRows.r) - uniquesLeft + for rightRow := 0; rightRow < right.NumRows(); rightRow++ { + var isRowCommon bool + for commonRow := 0; commonRow < len(commonRows.r); commonRow++ { + if rightRow == commonRows.r[commonRow] { + isRowCommon = true + break + } + } + if !isRowCommon { + if data.IsValid(rightRow) { + buf.SetOrDropStrict(newRow, data.Value(rightRow)) + } + newRow++ + } + } + case Float64: + data := array.NewFloat64Data(right.Column(rightCol).Data()) + + // Fill common rows from right bow + for newRow := 0; newRow < newNumRows; newRow++ { + for commonRow < len(commonRows.l) && + leftRow == commonRows.l[commonRow] && + newRow < newNumRows { + if data.IsValid(commonRows.r[commonRow]) { + buf.SetOrDropStrict(newRow, data.Value(commonRows.r[commonRow])) + } + if commonRow+1 < len(commonRows.l) && + commonRows.l[commonRow+1] == leftRow { + newRow++ + } + commonRow++ + } + leftRow++ + } + + // Fill remaining rows from right bow + newRow := left.NumRows() + len(commonRows.r) - uniquesLeft + for rightRow := 0; rightRow < right.NumRows(); rightRow++ { + var isRowCommon bool + for commonRow := 0; commonRow < len(commonRows.r); commonRow++ { + if rightRow == commonRows.r[commonRow] { + isRowCommon = true + break + } + } + if !isRowCommon { + if data.IsValid(rightRow) { + buf.SetOrDropStrict(newRow, data.Value(rightRow)) + } + newRow++ + } + } + case Boolean: + data := array.NewBooleanData(right.Column(rightCol).Data()) + + // Fill common rows from right bow + for newRow := 0; newRow < newNumRows; newRow++ { + for commonRow < len(commonRows.l) && + leftRow == commonRows.l[commonRow] && + newRow < newNumRows { + if data.IsValid(commonRows.r[commonRow]) { + buf.SetOrDropStrict(newRow, data.Value(commonRows.r[commonRow])) + } + if commonRow+1 < len(commonRows.l) && + commonRows.l[commonRow+1] == leftRow { + newRow++ + } + commonRow++ + } + leftRow++ + } + + // Fill remaining rows from right bow + newRow := left.NumRows() + len(commonRows.r) - uniquesLeft + for rightRow := 0; rightRow < right.NumRows(); rightRow++ { + var isRowCommon bool + for commonRow := 0; commonRow < len(commonRows.r); commonRow++ { + if rightRow == commonRows.r[commonRow] { + isRowCommon = true + break + } + } + if !isRowCommon { + if data.IsValid(rightRow) { + buf.SetOrDropStrict(newRow, data.Value(rightRow)) + } + newRow++ + } + } + case String: + data := array.NewStringData(right.Column(rightCol).Data()) + + // Fill common rows from right bow + for newRow := 0; newRow < newNumRows; newRow++ { + for commonRow < len(commonRows.l) && + leftRow == commonRows.l[commonRow] && + newRow < newNumRows { + if data.IsValid(commonRows.r[commonRow]) { + buf.SetOrDropStrict(newRow, data.Value(commonRows.r[commonRow])) + } + if commonRow+1 < len(commonRows.l) && + commonRows.l[commonRow+1] == leftRow { + newRow++ + } + commonRow++ + } + leftRow++ + } + + // Fill remaining rows from right bow + newRow := left.NumRows() + len(commonRows.r) - uniquesLeft + for rightRow := 0; rightRow < right.NumRows(); rightRow++ { + var isRowCommon bool + for commonRow := 0; commonRow < len(commonRows.r); commonRow++ { + if rightRow == commonRows.r[commonRow] { + isRowCommon = true + break + } + } + if !isRowCommon { + if data.IsValid(rightRow) { + buf.SetOrDropStrict(newRow, data.Value(rightRow)) + } + newRow++ + } + } + default: + panic(fmt.Errorf("unsupported type '%v'", right.ColumnType(rightCol))) + } + (*newSeries)[colIndex] = NewSeriesFromBuffer(right.ColumnName(rightCol), buf) + rightCol++ + } +} diff --git a/bowmetadata.go b/bowmetadata.go index ac0b18e..28cb52d 100644 --- a/bowmetadata.go +++ b/bowmetadata.go @@ -3,8 +3,8 @@ package bow import ( "fmt" - "github.com/apache/arrow/go/v7/arrow" - "github.com/apache/arrow/go/v7/arrow/array" + "github.com/apache/arrow/go/v8/arrow" + "github.com/apache/arrow/go/v8/arrow/array" ) // Metadata is an arrow metadata wrapping diff --git a/bowparquet.go b/bowparquet.go index 7e10a9b..d2a8d21 100644 --- a/bowparquet.go +++ b/bowparquet.go @@ -1,152 +1,68 @@ package bow import ( - "encoding/json" + "context" "errors" "fmt" + "os" "strings" "time" - "github.com/xitongsys/parquet-go-source/local" - "github.com/xitongsys/parquet-go/layout" - "github.com/xitongsys/parquet-go/marshal" - "github.com/xitongsys/parquet-go/parquet" - "github.com/xitongsys/parquet-go/reader" - "github.com/xitongsys/parquet-go/schema" - "github.com/xitongsys/parquet-go/source" - "github.com/xitongsys/parquet-go/writer" + "github.com/apache/arrow/go/v8/arrow" + "github.com/apache/arrow/go/v8/arrow/array" + "github.com/apache/arrow/go/v8/arrow/memory" + "github.com/apache/arrow/go/v8/parquet" + "github.com/apache/arrow/go/v8/parquet/compress" + "github.com/apache/arrow/go/v8/parquet/file" + "github.com/apache/arrow/go/v8/parquet/pqarrow" ) -var mapParquetToBowTypes = map[parquet.Type]Type{ - parquet.Type_BOOLEAN: Boolean, - parquet.Type_INT64: Int64, - parquet.Type_DOUBLE: Float64, - parquet.Type_BYTE_ARRAY: String, -} - -var mapBowToParquetTypes = map[Type]parquet.Type{ - Boolean: parquet.Type_BOOLEAN, - Int64: parquet.Type_INT64, - Float64: parquet.Type_DOUBLE, - String: parquet.Type_BYTE_ARRAY, -} - const keyParquetColTypesMeta = "col_types" -type parquetColTypesMeta struct { - Name string `json:"name"` - LogicalType *parquet.LogicalType `json:"logical_type"` -} - // NewBowFromParquet loads a parquet object from the file path, returning a new Bow // Only value columns are used to create the new Bow. // Argument verbose is used to print information about the file loaded. -func NewBowFromParquet(path string, verbose bool) (Bow, error) { - fr, err := local.NewLocalFileReader(path) +func NewBowFromParquet(filename string, verbose bool) (Bow, error) { + rdr, err := file.OpenParquetFile(filename, false) if err != nil { - return nil, fmt.Errorf("local.NewLocalFileReader: %w", err) - } - - pr := new(reader.ParquetReader) - pr.NP = 4 - pr.PFile = fr - if err = pr.ReadFooter(); err != nil { - return nil, fmt.Errorf("reader.ParquetReader.ReadFooter: %w", err) - } - pr.ColumnBuffers = make(map[string]*reader.ColumnBufferType) - pr.SchemaHandler = schema.NewSchemaHandlerFromSchemaList(pr.Footer.GetSchema()) - - var originalColNames = make([]string, len(pr.Footer.GetSchema())) - for i, se := range pr.Footer.GetSchema() { - originalColNames[i] = se.Name + return nil, fmt.Errorf("file.OpenParquetFile: %w", err) } + defer rdr.Close() - var originalRowGroups = make([]*parquet.RowGroup, len(pr.Footer.RowGroups)) - for r, rg := range pr.Footer.RowGroups { - var originalCols = make([]*parquet.ColumnChunk, len(rg.Columns)) - for c, col := range rg.Columns { - var originalMetaData = parquet.ColumnMetaData{ - PathInSchema: col.MetaData.PathInSchema, - } - var originalCol = parquet.ColumnChunk{ - MetaData: &originalMetaData, - } - originalCols[c] = &originalCol - } - originalRowGroups[r] = &parquet.RowGroup{Columns: originalCols} - } - - pr.RenameSchema() - - var valueColIndex int64 - var series = make([]Series, pr.SchemaHandler.GetColumnNum()) - var parquetColTypesMetas []parquetColTypesMeta - for colIndex, col := range pr.Footer.GetSchema() { - if col.NumChildren != nil { - continue - } - - if col.ConvertedType != nil || col.LogicalType != nil { - parquetColTypesMetas = append(parquetColTypesMetas, parquetColTypesMeta{ - Name: originalColNames[colIndex], - LogicalType: col.LogicalType, - }) - } - - values, _, _, err := pr.ReadColumnByIndex(valueColIndex, pr.GetNumRows()) - if err != nil { - return nil, fmt.Errorf("reader.ParquetReader.ReadColumnByIndex: %w", err) - } - - bowType := mapParquetToBowTypes[col.GetType()] - buf := NewBuffer(len(values), bowType) - for i, v := range values { - buf.SetOrDrop(i, v) - } - series[valueColIndex] = NewSeriesFromBuffer(originalColNames[colIndex], buf) - - pr.Footer.Schema[colIndex].Name = originalColNames[colIndex] - valueColIndex++ + mem := memory.NewCheckedAllocator(memory.DefaultAllocator) + arrowRdr, err := pqarrow.NewFileReader(rdr, pqarrow.ArrowReadProperties{}, mem) + if err != nil { + return nil, fmt.Errorf("pqarrow.NewFileReader: %w", err) } - for r, rg := range pr.Footer.RowGroups { - for c := range rg.Columns { - pr.Footer.RowGroups[r].Columns[c].MetaData.PathInSchema = originalRowGroups[r]. - Columns[c].MetaData.PathInSchema - } + tbl, err := arrowRdr.ReadTable(context.Background()) + if err != nil { + return nil, fmt.Errorf("pqarrow.FileReader.ReadTable: %w", err) } + defer tbl.Release() - var keys, values []string - for _, m := range pr.Footer.KeyValueMetadata { - if m.GetKey() != "ARROW:schema" && m.GetKey() != keyParquetColTypesMeta { - keys = append(keys, m.GetKey()) - values = append(values, m.GetValue()) - } - } + //fmt.Printf("SCHEMA:%s\n", tbl.Schema().String()) + //fmt.Printf("COLS:%d\n", tbl.NumCols()) - if len(parquetColTypesMetas) > 0 { - colTypesJSON, err := json.Marshal(parquetColTypesMetas) - if err != nil { - return nil, fmt.Errorf("json.Marshal: %w", err) + cols := make([]arrow.Array, tbl.NumCols()) + for i := 0; i < int(tbl.NumCols()); i++ { + if len(tbl.Column(i).Data().Chunks()) != 1 { + return nil, fmt.Errorf("column %d has %d chunks", i, len(tbl.Column(i).Data().Chunks())) } - keys = append(keys, keyParquetColTypesMeta) - values = append(values, string(colTypesJSON)) - } - - b, err := NewBowWithMetadata(NewMetadata(keys, values), series...) - if err != nil { - return nil, fmt.Errorf("NewBowWithMetadata: %w", err) + //fmt.Printf("FIELD %d\n%+v\n", i, tbl.Schema().Field(i)) + cols[i] = tbl.Column(i).Data().Chunk(0) } - footerIndented, err := json.MarshalIndent(pr.Footer, "", "\t") + rec := array.NewRecord(tbl.Schema(), cols, tbl.NumRows()) + b, err := NewBowFromRecord(rec) if err != nil { - return nil, fmt.Errorf("json.MarshalIndent: %w", err) + return nil, err } if verbose { fmt.Printf( - "bow.NewBowFromParquet: %s successfully read: %d rows\n%+v\n%+v\n", - path, b.NumRows(), b.Schema().String(), string(footerIndented)) + "bow.NewBowFromParquet: %s successfully read: %d rows\n%+v\n", + filename, b.NumRows(), b.Schema()) } return b, nil @@ -154,100 +70,38 @@ func NewBowFromParquet(path string, verbose bool) (Bow, error) { // WriteParquet writes a Bow to the binary parquet format. // Argument verbose is used to print information about the written file. -func (b *bow) WriteParquet(path string, verbose bool) error { +func (b *bow) WriteParquet(filename string, verbose bool) error { if b.NumCols() == 0 { return fmt.Errorf("bow has 0 columns") } - if !strings.HasSuffix(path, ".parquet") { - path += ".parquet" - } - - var parquetColTypesMetas []parquetColTypesMeta - keyIndex := b.Metadata().FindKey(keyParquetColTypesMeta) - if keyIndex != -1 { - var err error - parquetColTypesMetas, err = readColTypesMeta(b, b.Metadata().Values()[keyIndex]) - if err != nil { - return fmt.Errorf("readColTypesMeta: %w", err) - } - } - - sElem := parquet.NewSchemaElement() - requiredRepType := parquet.FieldRepetitionType_REQUIRED - sElem.RepetitionType = &requiredRepType - sElem.Name = "Schema" - numChildren := int32(b.NumCols()) - sElem.NumChildren = &numChildren - - var sElems []*parquet.SchemaElement - sElems = append(sElems, sElem) - lTypes := []*parquet.LogicalType{nil} - for i, f := range b.Schema().Fields() { - parquetType := mapBowToParquetTypes[b.ColumnType(i)] - sElem = parquet.NewSchemaElement() - sElem.Type = &parquetType - optionalRepType := parquet.FieldRepetitionType_OPTIONAL - sElem.RepetitionType = &optionalRepType - sElem.Name = f.Name - for j, t := range parquetColTypesMetas { - if t.Name == f.Name { - sElem.LogicalType = parquetColTypesMetas[j].LogicalType - } - } - sElems = append(sElems, sElem) - lTypes = append(lTypes, sElem.LogicalType) + if !strings.HasSuffix(filename, ".parquet") { + filename += ".parquet" } - parquetFile, err := local.NewLocalFileWriter(path) + f, err := os.OpenFile(filename, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, os.ModePerm) if err != nil { - return fmt.Errorf("local.NewLocalFileWriter: %w", err) + return err } - defer parquetFile.Close() - - parquetWriter, err := newJSONWriter(sElems, parquetFile, 4) + allocator := memory.NewCheckedAllocator(memory.DefaultAllocator) + wr, err := pqarrow.NewFileWriter(b.Schema(), f, + parquet.NewWriterProperties(parquet.WithCompression(compress.Codecs.Snappy)), + pqarrow.NewArrowWriterProperties(pqarrow.WithAllocator(allocator))) if err != nil { - return fmt.Errorf("newJSONWriter: %w", err) + return err } - for k, key := range b.Metadata().Keys() { - if key != "ARROW:schema" { - parquetWriter.Footer.KeyValueMetadata = append(parquetWriter.Footer.KeyValueMetadata, - &parquet.KeyValue{ - Key: key, - Value: &b.Metadata().Values()[k], - }) - } - } - - for i, lt := range lTypes { - parquetWriter.SchemaHandler.SchemaElements[i].LogicalType = lt - } - - for row := range b.GetRowsChan() { - rowJSON, err := json.Marshal(row) - if err != nil { - return fmt.Errorf("json.Marshal: %w", err) - } - if err = parquetWriter.Write(string(rowJSON)); err != nil { - return fmt.Errorf("JSONWriter.Write: %w", err) - } - } - - if err = parquetWriter.WriteStop(); err != nil { - return fmt.Errorf("JSONWriter.WriteStop: %w", err) - } - - footerBytes, err := json.MarshalIndent(parquetWriter.Footer, "", "\t") - if err != nil { - return fmt.Errorf("json.MarshalIndent: %w", err) + if err = wr.Write(b.Record); err != nil { + return err } + wr.Close() if verbose { fmt.Printf( - "bow.WriteParquet: %s successfully written: %d rows\n%s\n", - path, parquetWriter.Footer.NumRows, string(footerBytes)) + "bow.WriteParquet: %s successfully written: %d rows\n", + filename, b.NumRows()) } + fmt.Printf("%s\n", b.Schema()) return nil } @@ -258,93 +112,10 @@ var ErrColTimeUnitNotFound = errors.New("column time unit not found in parquet m // from the bow metadata read from a parquet file. // If no time unit metadata is found, time.Duration(0) is returned along with ErrColTimeUnitNotFound. func (b *bow) GetParquetMetaColTimeUnit(colIndex int) (time.Duration, error) { - colName := b.ColumnName(colIndex) - keyIndex := b.Metadata().FindKey(keyParquetColTypesMeta) if keyIndex == -1 { return time.Duration(0), ErrColTimeUnitNotFound } - colTypesMeta, err := readColTypesMeta(b, b.Metadata().Values()[keyIndex]) - if err != nil { - return time.Duration(0), fmt.Errorf("readColTypesMeta: %w", err) - } - - for _, m := range colTypesMeta { - if m.Name == colName { - if m.LogicalType != nil && - m.LogicalType.IsSetTIMESTAMP() { - unit := m.LogicalType.TIMESTAMP.GetUnit() - switch { - case unit.IsSetMILLIS(): - return time.Millisecond, nil - case unit.IsSetMICROS(): - return time.Microsecond, nil - case unit.IsSetNANOS(): - return time.Nanosecond, nil - } - } - } - } - return time.Duration(0), ErrColTimeUnitNotFound } - -func readColTypesMeta(b Bow, jsonEncodedData string) ([]parquetColTypesMeta, error) { - var colTypesMeta []parquetColTypesMeta - if err := json.Unmarshal([]byte(jsonEncodedData), &colTypesMeta); err != nil { - return nil, fmt.Errorf("invalid column types metadata: %+v", jsonEncodedData) - } - - if len(colTypesMeta) > b.NumCols() { - return nil, fmt.Errorf("invalid column types metadata: %+v", colTypesMeta) - } - - var countByCols = make([]int, b.NumCols()) - for _, t := range colTypesMeta { - colFound := false - for i, f := range b.Schema().Fields() { - if t.Name == f.Name { - countByCols[i]++ - colFound = true - } - } - if !colFound { - return nil, fmt.Errorf("invalid column types metadata: %+v", colTypesMeta) - } - } - - for _, count := range countByCols { - if count > 1 { - return nil, fmt.Errorf("invalid column types metadata: %+v", colTypesMeta) - } - } - - return colTypesMeta, nil -} - -func newJSONWriter(se []*parquet.SchemaElement, pfile source.ParquetFile, np int64) (*writer.JSONWriter, error) { - res := new(writer.JSONWriter) - res.SchemaHandler = schema.NewSchemaHandlerFromSchemaList(se) - res.SchemaHandler.CreateInExMap() - - res.PFile = pfile - res.PageSize = 8 * 1024 //8K - res.RowGroupSize = 128 * 1024 * 1024 //128M - res.CompressionType = parquet.CompressionCodec_SNAPPY - res.PagesMapBuf = make(map[string][]*layout.Page) - res.DictRecs = make(map[string]*layout.DictRecType) - res.NP = np - res.Footer = parquet.NewFileMetaData() - res.Footer.Version = 1 - res.Footer.Schema = append(res.Footer.Schema, res.SchemaHandler.SchemaElements...) - res.Offset = 4 - res.MarshalFunc = marshal.MarshalJSON - - _, err := res.PFile.Write([]byte("PAR1")) - if err != nil { - return nil, err - } - - return res, nil -} diff --git a/bowparquet_test.go b/bowparquet_test.go index c4267f6..abcd060 100644 --- a/bowparquet_test.go +++ b/bowparquet_test.go @@ -7,10 +7,9 @@ import ( "testing" "time" - "github.com/apache/arrow/go/v7/arrow" + "github.com/apache/arrow/go/v8/arrow" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" - "github.com/xitongsys/parquet-go/parquet" ) const ( @@ -20,12 +19,12 @@ const ( func TestParquet(t *testing.T) { t.Run("read/write input file", func(t *testing.T) { - bBefore, err := NewBowFromParquet(testInputFileName, false) + bBefore, err := NewBowFromParquet(testInputFileName, true) assert.NoError(t, err) - assert.NoError(t, bBefore.WriteParquet(testOutputFileName, false)) + assert.NoError(t, bBefore.WriteParquet(testOutputFileName, true)) - bAfter, err := NewBowFromParquet(testOutputFileName+".parquet", false) + bAfter, err := NewBowFromParquet(testOutputFileName+".parquet", true) assert.NoError(t, err) assert.Equal(t, bBefore.String(), bAfter.String()) @@ -45,11 +44,13 @@ func TestParquet(t *testing.T) { }) require.NoError(t, err) - assert.NoError(t, bBefore.WriteParquet(testOutputFileName+"_withrows", false)) + assert.NoError(t, bBefore.WriteParquet(testOutputFileName+"_withrows", true)) - bAfter, err := NewBowFromParquet(testOutputFileName+"_withrows.parquet", false) + bAfter, err := NewBowFromParquet(testOutputFileName+"_withrows.parquet", true) assert.NoError(t, err) + fmt.Printf("bBefore\n%s\n", bBefore) + fmt.Printf("bAfter\n%s\n", bAfter) assert.Equal(t, bBefore.String(), bAfter.String()) require.NoError(t, os.Remove(testOutputFileName+"_withrows.parquet")) @@ -62,9 +63,9 @@ func TestParquet(t *testing.T) { [][]interface{}{}) require.NoError(t, err) - assert.NoError(t, bBefore.WriteParquet(testOutputFileName+"_norows", false)) + assert.NoError(t, bBefore.WriteParquet(testOutputFileName+"_norows", true)) - bAfter, err := NewBowFromParquet(testOutputFileName+"_norows.parquet", false) + bAfter, err := NewBowFromParquet(testOutputFileName+"_norows.parquet", true) assert.NoError(t, err) assert.Equal(t, bBefore.String(), bAfter.String()) @@ -111,10 +112,10 @@ func TestParquet(t *testing.T) { series...) require.NoError(t, err) - err = bBefore.WriteParquet(testOutputFileName+"_meta", false) + err = bBefore.WriteParquet(testOutputFileName+"_meta", true) assert.NoError(t, err) - bAfter, err := NewBowFromParquet(testOutputFileName+"_meta.parquet", false) + bAfter, err := NewBowFromParquet(testOutputFileName+"_meta.parquet", true) assert.NoError(t, err) assert.Equal(t, bBefore.String(), bAfter.String()) @@ -202,29 +203,7 @@ func TestBowGetParquetMetaColTimeUnit(t *testing.T) { } func newMetaWithParquetTimestampCol(keys, values []string, colName string, timeUnit time.Duration) Metadata { - var colTypes = make([]parquetColTypesMeta, 1) - - unit := parquet.TimeUnit{} - switch timeUnit { - case time.Millisecond: - unit.MILLIS = &parquet.MilliSeconds{} - case time.Microsecond: - unit.MICROS = &parquet.MicroSeconds{} - case time.Nanosecond: - unit.NANOS = &parquet.NanoSeconds{} - default: - panic(fmt.Errorf("unsupported time unit '%s'", timeUnit)) - } - - logicalType := parquet.LogicalType{ - TIMESTAMP: &parquet.TimestampType{ - IsAdjustedToUTC: true, - Unit: &unit, - }} - colTypes[0] = parquetColTypesMeta{ - Name: colName, - LogicalType: &logicalType, - } + colTypes := "" colTypesJSON, err := json.Marshal(colTypes) if err != nil { diff --git a/bowrecord.go b/bowrecord.go index 188a8ec..3390e3e 100644 --- a/bowrecord.go +++ b/bowrecord.go @@ -4,13 +4,13 @@ import ( "errors" "fmt" - "github.com/apache/arrow/go/v7/arrow" - "github.com/apache/arrow/go/v7/arrow/array" + "github.com/apache/arrow/go/v8/arrow" + "github.com/apache/arrow/go/v8/arrow/array" ) func NewBowFromRecord(record arrow.Record) (Bow, error) { for _, f := range record.Schema().Fields() { - if getBowTypeFromArrowType(f.Type) == Unknown { + if getBowTypeFromArrowTypeFingerprint(f.Type) == Unknown { return nil, fmt.Errorf("unsupported type: %s", f.Type.Name()) } } @@ -42,7 +42,11 @@ func newRecord(metadata Metadata, series ...Series) (arrow.Record, error) { "bow.Series '%s' has a length of %d, which is different from the previous ones", s.Name, s.Array.Len()) } - fields = append(fields, arrow.Field{Name: s.Name, Type: s.Array.DataType()}) + fields = append(fields, arrow.Field{ + Name: s.Name, + Type: s.Array.DataType(), + Nullable: true, + }) arrays = append(arrays, s.Array) } diff --git a/bowseries.gen.go b/bowseries.gen.go deleted file mode 100644 index f670ec0..0000000 --- a/bowseries.gen.go +++ /dev/null @@ -1,151 +0,0 @@ -// Code generated by bowseries.gen.go.tmpl. DO NOT EDIT. - -package bow - -import ( - "fmt" - "github.com/apache/arrow/go/v7/arrow" - "github.com/apache/arrow/go/v7/arrow/array" - "github.com/apache/arrow/go/v7/arrow/bitutil" - "github.com/apache/arrow/go/v7/arrow/memory" -) - -func NewSeries(name string, dataArray interface{}, validityArray interface{}) Series { - switch v := dataArray.(type) { - case []int64: - return newInt64Series(name, v, buildNullBitmapBytes(len(v), validityArray)) - case []float64: - return newFloat64Series(name, v, buildNullBitmapBytes(len(v), validityArray)) - case []bool: - return newBooleanSeries(name, v, buildNullBitmapBytes(len(v), validityArray)) - case []string: - return newStringSeries(name, v, buildNullBitmapBytes(len(v), validityArray)) - default: - panic(fmt.Errorf("unsupported type %T", v)) - } -} - -func NewSeriesFromBuffer(name string, buf Buffer) Series { - switch data := buf.Data.(type) { - case []int64: - return newInt64Series(name, data, buf.nullBitmapBytes) - case []float64: - return newFloat64Series(name, data, buf.nullBitmapBytes) - case []bool: - return newBooleanSeries(name, data, buf.nullBitmapBytes) - case []string: - return newStringSeries(name, data, buf.nullBitmapBytes) - default: - panic(fmt.Errorf("unsupported type '%T'", buf.Data)) - } -} - -func NewSeriesFromInterfaces(name string, typ Type, cells []interface{}) Series { - if typ == Unknown { - var err error - if typ, err = seekType(cells); err != nil { - panic(err) - } - } - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - switch typ { - case Int64: - builder := array.NewInt64Builder(mem) - defer builder.Release() - builder.Resize(len(cells)) - for i := 0; i < len(cells); i++ { - v, ok := ToInt64(cells[i]) - if !ok { - builder.AppendNull() - continue - } - builder.Append(v) - } - return Series{Name: name, Array: builder.NewArray()} - case Float64: - builder := array.NewFloat64Builder(mem) - defer builder.Release() - builder.Resize(len(cells)) - for i := 0; i < len(cells); i++ { - v, ok := ToFloat64(cells[i]) - if !ok { - builder.AppendNull() - continue - } - builder.Append(v) - } - return Series{Name: name, Array: builder.NewArray()} - case Boolean: - builder := array.NewBooleanBuilder(mem) - defer builder.Release() - builder.Resize(len(cells)) - for i := 0; i < len(cells); i++ { - v, ok := ToBoolean(cells[i]) - if !ok { - builder.AppendNull() - continue - } - builder.Append(v) - } - return Series{Name: name, Array: builder.NewArray()} - case String: - builder := array.NewStringBuilder(mem) - defer builder.Release() - builder.Resize(len(cells)) - for i := 0; i < len(cells); i++ { - v, ok := ToString(cells[i]) - if !ok { - builder.AppendNull() - continue - } - builder.Append(v) - } - return Series{Name: name, Array: builder.NewArray()} - default: - panic(fmt.Errorf("unhandled type %s", typ)) - } -} - -func newInt64Series(name string, data []int64, valid []byte) Series { - length := len(data) - return Series{ - Name: name, - Array: array.NewInt64Data( - array.NewData(mapBowToArrowTypes[Int64], length, - []*memory.Buffer{ - memory.NewBufferBytes(valid), - memory.NewBufferBytes(arrow.Int64Traits.CastToBytes(data)), - }, nil, length-bitutil.CountSetBits(valid, 0, length), 0), - ), - } -} - -func newFloat64Series(name string, data []float64, valid []byte) Series { - length := len(data) - return Series{ - Name: name, - Array: array.NewFloat64Data( - array.NewData(mapBowToArrowTypes[Float64], length, - []*memory.Buffer{ - memory.NewBufferBytes(valid), - memory.NewBufferBytes(arrow.Float64Traits.CastToBytes(data)), - }, nil, length-bitutil.CountSetBits(valid, 0, length), 0), - ), - } -} - -func newBooleanSeries(name string, data []bool, valid []byte) Series { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - builder := array.NewBooleanBuilder(mem) - defer builder.Release() - builder.AppendValues(data, buildNullBitmapBool(len(data), valid)) - return Series{Name: name, Array: builder.NewArray()} -} - -func newStringSeries(name string, data []string, valid []byte) Series { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - builder := array.NewStringBuilder(mem) - defer builder.Release() - builder.AppendValues(data, buildNullBitmapBool(len(data), valid)) - return Series{Name: name, Array: builder.NewArray()} -} diff --git a/bowseries.gen.go.tmpl b/bowseries.gen.go.tmpl deleted file mode 100644 index c6496b3..0000000 --- a/bowseries.gen.go.tmpl +++ /dev/null @@ -1,85 +0,0 @@ -package bow - -import ( - "fmt" - "github.com/apache/arrow/go/v7/arrow" - "github.com/apache/arrow/go/v7/arrow/array" - "github.com/apache/arrow/go/v7/arrow/bitutil" - "github.com/apache/arrow/go/v7/arrow/memory" -) - -func NewSeries(name string, dataArray interface{}, validityArray interface{}) Series { - switch v := dataArray.(type) { - {{range .Data.types -}} - case []{{ .type }}: - return new{{ .Type }}Series(name, v, buildNullBitmapBytes(len(v), validityArray)) - {{end -}} - default: - panic(fmt.Errorf("unsupported type %T", v)) - } -} - -func NewSeriesFromBuffer(name string, buf Buffer) Series { - switch data := buf.Data.(type) { - {{range .Data.types -}} - case []{{ .type }}: - return new{{ .Type }}Series(name, data, buf.nullBitmapBytes) - {{end -}} - default: - panic(fmt.Errorf("unsupported type '%T'", buf.Data)) - } -} - -func NewSeriesFromInterfaces(name string, typ Type, cells []interface{}) Series { - if typ == Unknown { - var err error - if typ, err = seekType(cells); err != nil { - panic(err) - } - } - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - switch typ { - {{range .Data.types -}} - case {{ .Type }}: - builder := array.New{{ .Type }}Builder(mem) - defer builder.Release() - builder.Resize(len(cells)) - for i := 0; i < len(cells); i++ { - v, ok := To{{ .Type }}(cells[i]) - if !ok { - builder.AppendNull() - continue - } - builder.Append(v) - } - return Series{Name: name, Array: builder.NewArray()} - {{end -}} - default: - panic(fmt.Errorf("unhandled type %s", typ)) - } -} - -{{range .Data.types -}} -func new{{ .Type }}Series(name string, data []{{ .type }}, valid []byte) Series { - {{ if .bufferMemoryMappingEqualArrow -}} - length := len(data) - return Series{ - Name: name, - Array: array.New{{ .Type }}Data( - array.NewData(mapBowToArrowTypes[{{ .Type }}], length, - []*memory.Buffer{ - memory.NewBufferBytes(valid), - memory.NewBufferBytes(arrow.{{ .Type }}Traits.CastToBytes(data)), - }, nil, length-bitutil.CountSetBits(valid, 0, length), 0), - ), - } - {{ else -}} - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - builder := array.New{{ .Type }}Builder(mem) - defer builder.Release() - builder.AppendValues(data, buildNullBitmapBool(len(data), valid)) - return Series{Name: name, Array: builder.NewArray()} - {{ end -}} -} - -{{end -}} \ No newline at end of file diff --git a/bowseries.gen_test.go b/bowseries.gen_test.go deleted file mode 100644 index f130914..0000000 --- a/bowseries.gen_test.go +++ /dev/null @@ -1,20 +0,0 @@ -package bow - -import ( - "testing" - - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" -) - -func TestNewSeriesFromInterfaces(t *testing.T) { - for _, typ := range allType { - t.Run(typ.String(), func(t *testing.T) { - testcase := []interface{}{typ.Convert(0), nil} - res, err := NewBow(NewSeriesFromInterfaces(typ.String(), typ, testcase)) - require.NoError(t, err) - assert.Equal(t, typ.Convert(0), res.GetValue(0, 0)) - assert.Equal(t, nil, res.GetValue(0, 1)) - }) - } -} diff --git a/bowseries.go b/bowseries.go index c40f89d..50b0fde 100644 --- a/bowseries.go +++ b/bowseries.go @@ -5,7 +5,9 @@ import ( "fmt" "github.com/apache/arrow/go/arrow/bitutil" - "github.com/apache/arrow/go/v7/arrow" + "github.com/apache/arrow/go/v8/arrow" + "github.com/apache/arrow/go/v8/arrow/array" + "github.com/apache/arrow/go/v8/arrow/memory" ) // A Series is simply a named Apache Arrow array.Interface, which is immutable @@ -14,6 +16,146 @@ type Series struct { Array arrow.Array } +func NewSeries(name string, dataArray interface{}, validityArray interface{}) Series { + switch v := dataArray.(type) { + case []int64: + return newInt64Series(name, v, buildNullBitmapBytes(len(v), validityArray)) + case []float64: + return newFloat64Series(name, v, buildNullBitmapBytes(len(v), validityArray)) + case []bool: + return newBooleanSeries(name, v, buildNullBitmapBytes(len(v), validityArray)) + case []string: + return newStringSeries(name, v, buildNullBitmapBytes(len(v), validityArray)) + default: + panic(fmt.Errorf("unsupported type %T", v)) + } +} + +func NewSeriesFromBuffer(name string, buf Buffer) Series { + switch data := buf.Data.(type) { + case []int64: + return newInt64Series(name, data, buf.nullBitmapBytes) + case []float64: + return newFloat64Series(name, data, buf.nullBitmapBytes) + case []bool: + return newBooleanSeries(name, data, buf.nullBitmapBytes) + case []string: + return newStringSeries(name, data, buf.nullBitmapBytes) + default: + panic(fmt.Errorf("unsupported type '%T'", buf.Data)) + } +} + +func NewSeriesFromInterfaces(name string, typ Type, cells []interface{}) Series { + if typ == Unknown { + var err error + if typ, err = seekType(cells); err != nil { + panic(err) + } + } + mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) + switch typ { + case Int64: + builder := array.NewInt64Builder(mem) + defer builder.Release() + builder.Resize(len(cells)) + for i := 0; i < len(cells); i++ { + v, ok := ToInt64(cells[i]) + if !ok { + builder.AppendNull() + continue + } + builder.Append(v) + } + return Series{Name: name, Array: builder.NewArray()} + case Float64: + builder := array.NewFloat64Builder(mem) + defer builder.Release() + builder.Resize(len(cells)) + for i := 0; i < len(cells); i++ { + v, ok := ToFloat64(cells[i]) + if !ok { + builder.AppendNull() + continue + } + builder.Append(v) + } + return Series{Name: name, Array: builder.NewArray()} + case Boolean: + builder := array.NewBooleanBuilder(mem) + defer builder.Release() + builder.Resize(len(cells)) + for i := 0; i < len(cells); i++ { + v, ok := ToBoolean(cells[i]) + if !ok { + builder.AppendNull() + continue + } + builder.Append(v) + } + return Series{Name: name, Array: builder.NewArray()} + case String: + builder := array.NewStringBuilder(mem) + defer builder.Release() + builder.Resize(len(cells)) + for i := 0; i < len(cells); i++ { + v, ok := ToString(cells[i]) + if !ok { + builder.AppendNull() + continue + } + builder.Append(v) + } + return Series{Name: name, Array: builder.NewArray()} + default: + panic(fmt.Errorf("unhandled type %s", typ)) + } +} + +func newInt64Series(name string, data []int64, valid []byte) Series { + length := len(data) + return Series{ + Name: name, + Array: array.NewInt64Data( + array.NewData(mapBowToArrowTypes[Int64], length, + []*memory.Buffer{ + memory.NewBufferBytes(valid), + memory.NewBufferBytes(arrow.Int64Traits.CastToBytes(data)), + }, nil, length-bitutil.CountSetBits(valid, 0, length), 0), + ), + } +} + +func newFloat64Series(name string, data []float64, valid []byte) Series { + length := len(data) + return Series{ + Name: name, + Array: array.NewFloat64Data( + array.NewData(mapBowToArrowTypes[Float64], length, + []*memory.Buffer{ + memory.NewBufferBytes(valid), + memory.NewBufferBytes(arrow.Float64Traits.CastToBytes(data)), + }, nil, length-bitutil.CountSetBits(valid, 0, length), 0), + ), + } +} + +func newBooleanSeries(name string, data []bool, valid []byte) Series { + mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) + builder := array.NewBooleanBuilder(mem) + defer builder.Release() + builder.AppendValues(data, buildNullBitmapBool(len(data), valid)) + return Series{Name: name, Array: builder.NewArray()} +} + +func newStringSeries(name string, data []string, valid []byte) Series { + mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) + builder := array.NewStringBuilder(mem) + defer builder.Release() + builder.AppendValues(data, buildNullBitmapBool(len(data), valid)) + return Series{Name: name, Array: builder.NewArray()} +} + func buildNullBitmapBool(dataLength int, validityArray interface{}) []bool { switch valid := validityArray.(type) { case nil: diff --git a/bowseries_test.go b/bowseries_test.go index 98eab58..801a856 100644 --- a/bowseries_test.go +++ b/bowseries_test.go @@ -3,8 +3,23 @@ package bow import ( "fmt" "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" ) +func TestNewSeriesFromInterfaces(t *testing.T) { + for _, typ := range allType { + t.Run(typ.String(), func(t *testing.T) { + testcase := []interface{}{typ.Convert(0), nil} + res, err := NewBow(NewSeriesFromInterfaces(typ.String(), typ, testcase)) + require.NoError(t, err) + assert.Equal(t, typ.Convert(0), res.GetValue(0, 0)) + assert.Equal(t, nil, res.GetValue(0, 1)) + }) + } +} + func BenchmarkNewSeries(b *testing.B) { for rows := 10; rows <= 100000; rows *= 10 { dataArray := make([]int64, rows) diff --git a/bowstring.go b/bowstring.go index 621a3c5..c635fa9 100644 --- a/bowstring.go +++ b/bowstring.go @@ -29,7 +29,7 @@ func (b *bow) String() string { // Print col names on buffer formatRow(func(colIndex int) string { - return fmt.Sprintf("%s:%v", b.Schema().Field(colIndex).Name, b.ColumnType(colIndex)) + return fmt.Sprintf("%s:%v %s", b.Schema().Field(colIndex).Name, b.ColumnType(colIndex), b.Schema().Field(colIndex).Fingerprint()) }) // Print each row on buffer diff --git a/bowtypes.go b/bowtypes.go index bfb0362..42312bc 100644 --- a/bowtypes.go +++ b/bowtypes.go @@ -1,7 +1,7 @@ package bow import ( - "github.com/apache/arrow/go/v7/arrow" + "github.com/apache/arrow/go/v8/arrow" ) type Type int @@ -22,6 +22,10 @@ const ( Int64 Boolean String + TimestampSec + TimestampMilli + TimestampMicro + TimestampNano // InputDependent is used in transformation like aggregation // when output type is infer with input type @@ -34,10 +38,24 @@ const ( var ( mapArrowToBowTypes = map[arrow.DataType]Type{ - arrow.PrimitiveTypes.Float64: Float64, - arrow.PrimitiveTypes.Int64: Int64, - arrow.FixedWidthTypes.Boolean: Boolean, - arrow.BinaryTypes.String: String, + arrow.PrimitiveTypes.Float64: Float64, + arrow.PrimitiveTypes.Int64: Int64, + arrow.FixedWidthTypes.Boolean: Boolean, + arrow.BinaryTypes.String: String, + arrow.FixedWidthTypes.Timestamp_s: TimestampSec, + arrow.FixedWidthTypes.Timestamp_ms: TimestampMilli, + arrow.FixedWidthTypes.Timestamp_us: TimestampMicro, + arrow.FixedWidthTypes.Timestamp_ns: TimestampNano, + } + mapArrowFingerprintToBowTypes = map[string]Type{ + arrow.PrimitiveTypes.Float64.Fingerprint(): Float64, + arrow.PrimitiveTypes.Int64.Fingerprint(): Int64, + arrow.FixedWidthTypes.Boolean.Fingerprint(): Boolean, + arrow.BinaryTypes.String.Fingerprint(): String, + arrow.FixedWidthTypes.Timestamp_s.Fingerprint(): TimestampSec, + arrow.FixedWidthTypes.Timestamp_ms.Fingerprint(): TimestampMilli, + arrow.FixedWidthTypes.Timestamp_us.Fingerprint(): TimestampMicro, + arrow.FixedWidthTypes.Timestamp_ns.Fingerprint(): TimestampNano, } mapBowToArrowTypes = func() map[Type]arrow.DataType { res := make(map[Type]arrow.DataType) @@ -116,6 +134,14 @@ func getBowTypeFromArrowType(arrowType arrow.DataType) Type { return typ } +func getBowTypeFromArrowTypeFingerprint(arrowType arrow.DataType) Type { + typ, ok := mapArrowFingerprintToBowTypes[arrowType.Fingerprint()] + if !ok { + return Unknown + } + return typ +} + func GetAllTypes() []Type { res := make([]Type, len(allType)) copy(res, allType) diff --git a/bowvalues.go b/bowvalues.go index e75edd9..baf7a4d 100644 --- a/bowvalues.go +++ b/bowvalues.go @@ -1,6 +1,6 @@ package bow -import "github.com/apache/arrow/go/v7/arrow/array" +import "github.com/apache/arrow/go/v8/arrow/array" func Int64Values(arr *array.Int64) []int64 { return arr.Int64Values() diff --git a/datatypes.yml b/datatypes.yml deleted file mode 100644 index 8bde5b8..0000000 --- a/datatypes.yml +++ /dev/null @@ -1,16 +0,0 @@ -types: - - Type: Int64 - type: int64 - comparable: true - bufferMemoryMappingEqualArrow: true - - Type: Float64 - type: float64 - comparable: true - bufferMemoryMappingEqualArrow: true - - Type: Boolean - type: bool - bufferMemoryMappingEqualArrow: false - - Type: String - type: string - comparable: true - bufferMemoryMappingEqualArrow: false diff --git a/doc.go b/doc.go deleted file mode 100644 index 4cc7dd8..0000000 --- a/doc.go +++ /dev/null @@ -1,6 +0,0 @@ -package bow - -//go:generate genius tmpl -d datatypes.yml -p ./ bowbuffer.gen.go.tmpl -//go:generate genius tmpl -d datatypes.yml -p ./ bowseries.gen.go.tmpl -//go:generate genius tmpl -d datatypes.yml -p ./ bowappend.gen.go.tmpl -//go:generate genius tmpl -d datatypes.yml -p ./ bowjoin.gen.go.tmpl diff --git a/go.mod b/go.mod index 023343d..887649f 100644 --- a/go.mod +++ b/go.mod @@ -4,25 +4,38 @@ go 1.17 require ( github.com/apache/arrow/go/arrow v0.0.0-20200730104253-651201b0f516 - github.com/apache/arrow/go/v7 v7.0.0-20220201131309-56e270fda7f5 - github.com/apache/thrift v0.15.0 // indirect + github.com/apache/arrow/go/v8 v8.0.0-20220407025312-96de9d584208 github.com/davecgh/go-spew v1.1.1 // indirect github.com/google/uuid v1.3.0 github.com/pmezard/go-difflib v1.0.0 // indirect github.com/stretchr/testify v1.7.0 - github.com/xitongsys/parquet-go v1.6.2 - github.com/xitongsys/parquet-go-source v0.0.0-20200817004010-026bad9b25d0 golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1 // indirect gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c // indirect ) require ( + github.com/JohnCGriffin/overflow v0.0.0-20211019200055-46fa312c352c // indirect + github.com/andybalholm/brotli v1.0.4 // indirect + github.com/apache/thrift v0.15.0 // indirect github.com/goccy/go-json v0.9.4 // indirect + github.com/golang/protobuf v1.5.2 // indirect github.com/golang/snappy v0.0.4 // indirect + github.com/google/flatbuffers v2.0.5+incompatible // indirect + github.com/klauspost/asmfmt v1.3.1 // indirect github.com/klauspost/compress v1.14.2 // indirect + github.com/klauspost/cpuid/v2 v2.0.9 // indirect + github.com/minio/asm2plan9s v0.0.0-20200509001527-cdd76441f9d8 // indirect + github.com/minio/c2goasm v0.0.0-20190812172519-36a3d3bbc4f3 // indirect github.com/pierrec/lz4/v4 v4.1.12 // indirect - golang.org/x/mod v0.6.0-dev.0.20211013180041-c96bc1413d57 // indirect - golang.org/x/sys v0.0.0-20220209214540-3681064d5158 // indirect - golang.org/x/tools v0.1.9 // indirect + github.com/zeebo/xxh3 v1.0.1 // indirect + golang.org/x/exp v0.0.0-20211216164055-b2b84827b756 // indirect + golang.org/x/mod v0.6.0-dev.0.20220106191415-9b9b3d81d5e3 // indirect + golang.org/x/net v0.0.0-20220127200216-cd36cc0744dd // indirect + golang.org/x/sys v0.0.0-20220406163625-3f8b81556e12 // indirect + golang.org/x/text v0.3.7 // indirect + golang.org/x/tools v0.1.10 // indirect + google.golang.org/genproto v0.0.0-20220126215142-9970aeb2e350 // indirect + google.golang.org/grpc v1.44.0 // indirect + google.golang.org/protobuf v1.27.1 // indirect gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c // indirect ) diff --git a/go.sum b/go.sum index 6f594b7..c1cae21 100644 --- a/go.sum +++ b/go.sum @@ -1,29 +1,11 @@ cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw= cloud.google.com/go v0.34.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw= -cloud.google.com/go v0.38.0/go.mod h1:990N+gfupTy94rShfmMCWGDn0LpTmnzTp2qbd1dvSRU= -cloud.google.com/go v0.44.1/go.mod h1:iSa0KzasP4Uvy3f1mN/7PiObzGgflwredwwASm/v6AU= -cloud.google.com/go v0.44.2/go.mod h1:60680Gw3Yr4ikxnPRS/oxxkBccT6SA1yMk63TGekxKY= -cloud.google.com/go v0.45.1/go.mod h1:RpBamKRgapWJb87xiFSdk4g1CME7QZg3uwTez+TSTjc= -cloud.google.com/go v0.46.3/go.mod h1:a6bKKbmY7er1mI7TEI4lsAkts/mkhTSZK8w33B4RAg0= -cloud.google.com/go v0.50.0/go.mod h1:r9sluTvynVuxRIOHXQEHMFffphuXHOMZMycpNR5e6To= -cloud.google.com/go v0.52.0/go.mod h1:pXajvRH/6o3+F9jDHZWQ5PbGhn+o8w9qiu/CffaVdO4= -cloud.google.com/go v0.53.0/go.mod h1:fp/UouUEsRkN6ryDKNW/Upv/JBKnv6WDthjR6+vze6M= -cloud.google.com/go/bigquery v1.0.1/go.mod h1:i/xbL2UlR5RvWAURpBYZTtm/cXjCha9lbfbpx4poX+o= -cloud.google.com/go/bigquery v1.3.0/go.mod h1:PjpwJnslEMmckchkHFfq+HTD2DmtT67aNFKH1/VBDHE= -cloud.google.com/go/bigquery v1.4.0/go.mod h1:S8dzgnTigyfTmLBfrtrhyYhwRxG72rYxvftPBK2Dvzc= -cloud.google.com/go/datastore v1.0.0/go.mod h1:LXYbyblFSglQ5pkeyhO+Qmw7ukd3C+pD7TKLgZqpHYE= -cloud.google.com/go/datastore v1.1.0/go.mod h1:umbIZjpQpHh4hmRpGhH4tLFup+FVzqBi1b3c64qFpCk= -cloud.google.com/go/pubsub v1.0.1/go.mod h1:R0Gpsv3s54REJCy4fxDixWD93lHJMoZTyQ2kNxGRt3I= -cloud.google.com/go/pubsub v1.1.0/go.mod h1:EwwdRX2sKPjnvnqCa270oGRyludottCI76h+R3AArQw= -cloud.google.com/go/pubsub v1.2.0/go.mod h1:jhfEVHT8odbXTkndysNHCcx0awwzvfOlguIAii9o8iA= -cloud.google.com/go/storage v1.0.0/go.mod h1:IhtSnM/ZTZV8YYJWCY8RULGVqBDmpoyjwiyrjsg+URw= -cloud.google.com/go/storage v1.5.0/go.mod h1:tpKbwo567HUNpVclU5sGELwQWBDZ8gh0ZeosJ0Rtdos= -cloud.google.com/go/storage v1.6.0/go.mod h1:N7U0C8pVQ/+NIKOBQyamJIeKQKkZ+mxpohlUTyfDhBk= dmitri.shuralyov.com/gpu/mtl v0.0.0-20190408044501-666a987793e9/go.mod h1:H6x//7gZCb22OMCxBHrMx7a5I7Hp++hsVxbQ4BYO7hU= dmitri.shuralyov.com/gpu/mtl v0.0.0-20201218220906-28db891af037/go.mod h1:H6x//7gZCb22OMCxBHrMx7a5I7Hp++hsVxbQ4BYO7hU= gioui.org v0.0.0-20210308172011-57750fc8a0a6/go.mod h1:RSH6KIUZ0p2xy5zHDxgAM4zumjgTw83q2ge/PI+yyw8= github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802/go.mod h1:IVnqGOEym/WlBOVXweHU+Q+/VP0lqqI8lqeDx9IjBqo= +github.com/JohnCGriffin/overflow v0.0.0-20211019200055-46fa312c352c h1:RGWPOewvKIROun94nF7v2cua9qP+thov/7M50KEoeSU= github.com/JohnCGriffin/overflow v0.0.0-20211019200055-46fa312c352c/go.mod h1:X0CRv0ky0k6m906ixxpzmDRLvX58TFUKS2eePweuyxk= github.com/Knetic/govaluate v3.0.1-0.20171022003610-9aa49832a739+incompatible/go.mod h1:r7JcOSlj0wfOMncg0iLm8Leh48TZaKVeNIfJntJ2wa0= github.com/OneOfOne/xxhash v1.2.2/go.mod h1:HSdplMjZKSmBqAxg5vPj2TmRDmfkzw+cTzAElWljhcU= @@ -36,16 +18,15 @@ github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc/go.mod h1:LOuy github.com/alecthomas/template v0.0.0-20190718012654-fb15b899a751/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc= github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0= github.com/alecthomas/units v0.0.0-20190717042225-c3de453c63f4/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0= +github.com/andybalholm/brotli v1.0.4 h1:V7DdXeJtZscaqfNuAdSRuRFzuiKlHSC/Zh3zl9qY3JY= github.com/andybalholm/brotli v1.0.4/go.mod h1:fO7iG3H7G2nSZ7m0zPUDn85XEX2GTukHGRSepvi9Eig= github.com/antihax/optional v1.0.0/go.mod h1:uupD/76wgC+ih3iEmQUL+0Ugr19nfwCT1kdvxnR2qWY= github.com/apache/arrow/go/arrow v0.0.0-20200730104253-651201b0f516 h1:byKBBF2CKWBjjA4J1ZL2JXttJULvWSl50LegTyRZ728= github.com/apache/arrow/go/arrow v0.0.0-20200730104253-651201b0f516/go.mod h1:QNYViu/X0HXDHw7m3KXzWSVXIbfUvJqBFe6Gj8/pYA0= -github.com/apache/arrow/go/v7 v7.0.0-20220201131309-56e270fda7f5 h1:YFoQL6S5yYC6uHoScA5xUlRANdTAlHWT3YTL+Yfg/GY= -github.com/apache/arrow/go/v7 v7.0.0-20220201131309-56e270fda7f5/go.mod h1:EMQu9P1iWg58RroDhuW458NFFdNG89lIPHsSsAUTg90= -github.com/apache/thrift v0.0.0-20181112125854-24918abba929/go.mod h1:cp2SuWMxlEZw2r+iP2GNCdIi4C1qmUzdZFSVb+bacwQ= +github.com/apache/arrow/go/v8 v8.0.0-20220407025312-96de9d584208 h1:dkJXBzsG4aIhQLl6FEIVfniTm4qrIvkJTmyFA+UIobI= +github.com/apache/arrow/go/v8 v8.0.0-20220407025312-96de9d584208/go.mod h1:UUe+gJaMnuFD6icfGSJxUjG/tX/POUbPS/wE+EFyncM= github.com/apache/thrift v0.12.0/go.mod h1:cp2SuWMxlEZw2r+iP2GNCdIi4C1qmUzdZFSVb+bacwQ= github.com/apache/thrift v0.13.0/go.mod h1:cp2SuWMxlEZw2r+iP2GNCdIi4C1qmUzdZFSVb+bacwQ= -github.com/apache/thrift v0.14.2/go.mod h1:cp2SuWMxlEZw2r+iP2GNCdIi4C1qmUzdZFSVb+bacwQ= github.com/apache/thrift v0.15.0 h1:aGvdaR0v1t9XLgjtBYwxcBvBOTMqClzwE26CHOgjW1Y= github.com/apache/thrift v0.15.0/go.mod h1:PHK3hniurgQaNMZYaCLEqXKsYK8upmhPbmdP2FXSqgU= github.com/armon/circbuf v0.0.0-20150827004946-bbbad097214e/go.mod h1:3U/XgcO3hCbHZ8TKRvWD2dDTCfh9M9ya+I9JpbB7O8o= @@ -54,7 +35,6 @@ github.com/armon/go-radix v0.0.0-20180808171621-7fddfc383310/go.mod h1:ufUuZ+zHj github.com/aryann/difflib v0.0.0-20170710044230-e206f873d14a/go.mod h1:DAHtR1m6lCRdSC2Tm3DSWRPvIPr6xNKyeHdqDQSQT+A= github.com/aws/aws-lambda-go v1.13.3/go.mod h1:4UKl9IzQMoD+QF79YdCuzCwp8VbmG4VAQwij/eHl5CU= github.com/aws/aws-sdk-go v1.27.0/go.mod h1:KmX6BPdI08NWTb3/sm4ZGu5ShLoqVDhKgpiN924inxo= -github.com/aws/aws-sdk-go v1.30.19/go.mod h1:5zCpMtNQVjRREroY7sYe8lOMRSxkhG6MZveU8YkpAk0= github.com/aws/aws-sdk-go-v2 v0.18.0/go.mod h1:JWVYvqSMppoMJC0x5wdwiImzgXTI9FuZwxzkQq9wy+g= github.com/beorn7/perks v0.0.0-20180321164747-3a771d992973/go.mod h1:Dwedo/Wpr24TaqPxmxbtue+5NUziq4I4S80YR8gNf3Q= github.com/beorn7/perks v1.0.0/go.mod h1:KWe93zE9D1o94FZ5RNwFwVgaQK1VOXiVxmqh+CedLV8= @@ -66,9 +46,6 @@ github.com/cenkalti/backoff v2.2.1+incompatible/go.mod h1:90ReRw6GdpyfrHakVjL/QH github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU= github.com/cespare/xxhash v1.1.0/go.mod h1:XrSqR1VqqWfGrhpAt58auRo0WTKS1nRRg3ghfAqPWnc= github.com/cespare/xxhash/v2 v2.1.1/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= -github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWRnGsAI= -github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e/go.mod h1:nSuG5e5PlCu98SY8svDHJxuZscDgtXS6KTTbou5AhLI= -github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1/go.mod h1:Q3SI9o4m/ZMnBNeIyt5eFwwo7qiLfzFZmjNmxjkiQlU= github.com/clbanning/x2j v0.0.0-20191024224557-825249438eec/go.mod h1:jMjuTZXRI4dUb/I5gc9Hdhagfvm9+RyrPryS/auMzxE= github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw= github.com/cncf/udpa/go v0.0.0-20191209042840-269d4d468f6f/go.mod h1:M8M6+tZqaGXZJjfX53e64911xZQV5JYwmTeXPW+k8Sc= @@ -80,7 +57,6 @@ github.com/cncf/xds/go v0.0.0-20210922020428-25de7278fc84/go.mod h1:eXthEFrGJvWH github.com/cncf/xds/go v0.0.0-20211011173535-cb28da3451f1/go.mod h1:eXthEFrGJvWHgFFCl3hGmgk+/aYT6PnTQLykKQRLhEs= github.com/cockroachdb/datadriven v0.0.0-20190809214429-80d97fb3cbaa/go.mod h1:zn76sxSg3SzpJ0PPJaLDCu+Bu0Lg3sKTORVIj19EIF8= github.com/codahale/hdrhistogram v0.0.0-20161010025455-3a0bb77429bd/go.mod h1:sE/e/2PUdi/liOCUjSTXgM1o87ZssimdTWN964YiIeI= -github.com/colinmarc/hdfs/v2 v2.1.1/go.mod h1:M3x+k8UKKmxtFu++uAZ0OtDU8jR3jnaZIAc6yK4Ue0c= github.com/coreos/go-semver v0.2.0/go.mod h1:nnelYz7RCh+5ahJtPPxZlU+153eP4D4r3EedlOD2RNk= github.com/coreos/go-systemd v0.0.0-20180511133405-39ca1b05acc7/go.mod h1:F5haX7vjVVG0kc13fIWeqUViNPyEJxv/OmvnBo0Yme4= github.com/coreos/go-systemd v0.0.0-20190321100706-95778dfbb74e/go.mod h1:F5haX7vjVVG0kc13fIWeqUViNPyEJxv/OmvnBo0Yme4= @@ -117,7 +93,6 @@ github.com/go-fonts/latin-modern v0.2.0/go.mod h1:rQVLdDMK+mK1xscDwsqM5J8U2jrRa3 github.com/go-fonts/liberation v0.1.1/go.mod h1:K6qoJYypsmfVjWg8KOVDQhLc8UDgIK2HYqyqAO9z7GY= github.com/go-fonts/stix v0.1.0/go.mod h1:w/c1f0ldAUlJmLBvlbkvVXLAD+tAMqobIIQpmnUIzUY= github.com/go-gl/glfw v0.0.0-20190409004039-e6da0acd62b1/go.mod h1:vR7hzQXu2zJy9AVAgeJqvqgH9Q5CA+iKCZ2gyEVpxRU= -github.com/go-gl/glfw/v3.3/glfw v0.0.0-20191125211704-12ad95a8df72/go.mod h1:tQ2UAYgL5IevRw8kRxooKSPJfGvJ9fJQFa0TUsXzTg8= github.com/go-gl/glfw/v3.3/glfw v0.0.0-20200222043503-6f7a984d4dc4/go.mod h1:tQ2UAYgL5IevRw8kRxooKSPJfGvJ9fJQFa0TUsXzTg8= github.com/go-kit/kit v0.8.0/go.mod h1:xBxKIO96dXMWWy0MnWVtmwkA9/13aqxPnvrjFYMA2as= github.com/go-kit/kit v0.9.0/go.mod h1:xBxKIO96dXMWWy0MnWVtmwkA9/13aqxPnvrjFYMA2as= @@ -128,8 +103,8 @@ github.com/go-logfmt/logfmt v0.4.0/go.mod h1:3RMwSq7FuexP4Kalkev3ejPJsZTpXXBr9+V github.com/go-logfmt/logfmt v0.5.0/go.mod h1:wCYkCAKZfumFQihp8CzCvQ3paCTfi41vtzG1KdI/P7A= github.com/go-logr/logr v0.4.0/go.mod h1:z6/tIYblkpsD+a4lm/fGIIU9mZ+XfAiaFtq7xTgseGU= github.com/go-sql-driver/mysql v1.4.0/go.mod h1:zAC/RDZ24gD3HViQzih4MyKcchzm+sOG5ZlKdlhCg5w= -github.com/go-sql-driver/mysql v1.5.0/go.mod h1:DCzpHaOWr8IXmIStZouvnhqoel9Qv2LBy8hT2VhHyBg= github.com/go-stack/stack v1.8.0/go.mod h1:v0f6uXyyMGvRgIKkXu+yp6POWl0qKG85gN/melR3HDY= +github.com/goccy/go-json v0.7.10/go.mod h1:6MelG93GURQebXPDq3khkgXZkazVtN9CRI+MGFi0w8I= github.com/goccy/go-json v0.9.4 h1:L8MLKG2mvVXiQu07qB6hmfqeSYQdOnqPot2GhsIwIaI= github.com/goccy/go-json v0.9.4/go.mod h1:6MelG93GURQebXPDq3khkgXZkazVtN9CRI+MGFi0w8I= github.com/gogo/googleapis v1.1.0/go.mod h1:gf4bu3Q80BeJ6H1S1vYPm8/ELATdvryBaNFGgqEef3s= @@ -140,15 +115,8 @@ github.com/golang/freetype v0.0.0-20170609003504-e2365dfdc4a0/go.mod h1:E/TSTwGw github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q= github.com/golang/groupcache v0.0.0-20160516000752-02826c3e7903/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= github.com/golang/groupcache v0.0.0-20190702054246-869f871628b6/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= -github.com/golang/groupcache v0.0.0-20191227052852-215e87163ea7/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= -github.com/golang/groupcache v0.0.0-20200121045136-8c9f03a8e57e/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= github.com/golang/mock v1.1.1/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A= -github.com/golang/mock v1.2.0/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A= -github.com/golang/mock v1.3.1/go.mod h1:sBzyDLLjw3U8JLTeZvSv8jJB+tU5PVekmnlKIyFUx0Y= -github.com/golang/mock v1.4.0/go.mod h1:UOMv5ysSaYNkG+OFQykRIcU/QvvxJf3p21QfJ2Bt3cw= -github.com/golang/mock v1.4.3/go.mod h1:UOMv5ysSaYNkG+OFQykRIcU/QvvxJf3p21QfJ2Bt3cw= github.com/golang/mock v1.5.0/go.mod h1:CWnOUgYIOo4TcNZ0wHX3YZCqsaM1I1Jvs6v3mP3KVu8= -github.com/golang/protobuf v1.1.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= github.com/golang/protobuf v1.3.1/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= github.com/golang/protobuf v1.3.2/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= @@ -162,9 +130,9 @@ github.com/golang/protobuf v1.4.1/go.mod h1:U8fpvMrcmy5pZrNK1lt4xCsGvpyWQ/VVv6QD github.com/golang/protobuf v1.4.2/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI= github.com/golang/protobuf v1.4.3/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI= github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk= +github.com/golang/protobuf v1.5.2 h1:ROPKBNFfQgOUMifHyP+KYbvpjbdoFNs+aK7DXlji0Tw= github.com/golang/protobuf v1.5.2/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY= github.com/golang/snappy v0.0.0-20180518054509-2e65f85255db/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= -github.com/golang/snappy v0.0.3/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= github.com/golang/snappy v0.0.4 h1:yAGX7huGHXlcLOEtBnF4w7FQwA26wojNCwOYAEhLjQM= github.com/golang/snappy v0.0.4/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= github.com/google/btree v0.0.0-20180813153112-4030bb1f1f0c/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ= @@ -178,20 +146,14 @@ github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMyw github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/google/go-cmp v0.5.7 h1:81/ik6ipDQS2aGcBfIN5dHDB36BwrStyeAQquSYCV4o= github.com/google/go-cmp v0.5.7/go.mod h1:n+brtR0CgQNWTVd5ZUFpTBC8YFBDLK/h/bpaJ8/DtOE= github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= -github.com/google/martian v2.1.0+incompatible/go.mod h1:9I4somxYTbIHy5NJKHRl3wXiIaQGbYVAs8BPL6v8lEs= -github.com/google/pprof v0.0.0-20181206194817-3ea8567a2e57/go.mod h1:zfwlbNMJ+OItoe0UupaVj+oy1omPYYDuagoSzA8v9mc= -github.com/google/pprof v0.0.0-20190515194954-54271f7e092f/go.mod h1:zfwlbNMJ+OItoe0UupaVj+oy1omPYYDuagoSzA8v9mc= -github.com/google/pprof v0.0.0-20191218002539-d4f498aebedc/go.mod h1:ZgVRPoUq/hfqzAqh7sHMqb3I9Rq5C59dIz2SbBwJ4eM= -github.com/google/pprof v0.0.0-20200212024743-f11f1df84d12/go.mod h1:ZgVRPoUq/hfqzAqh7sHMqb3I9Rq5C59dIz2SbBwJ4eM= github.com/google/renameio v0.1.0/go.mod h1:KWCgfxg9yswjAJkECMjeO8J8rahYeXnNhOm40UhjYkI= github.com/google/uuid v1.0.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/google/uuid v1.1.2/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/google/uuid v1.3.0 h1:t6JiXgmwXMjEs8VusXIJk2BXHsn+wx8BZdTaoZ5fu7I= github.com/google/uuid v1.3.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= -github.com/googleapis/gax-go/v2 v2.0.4/go.mod h1:0Wqv26UfaUD9n4G6kQubkQ+KchISgw+vpHVxEJEs9eg= -github.com/googleapis/gax-go/v2 v2.0.5/go.mod h1:DWXyrwAJ9X0FpwwEdw+IPEYBICEFu5mhpdKc/us6bOk= github.com/gopherjs/gopherjs v0.0.0-20181017120253-0766667cb4d1/go.mod h1:wJfORRmW1u3UXTncJ5qlYoELFm8eSnnEO6hX4iZ3EWY= github.com/gorilla/context v1.1.1/go.mod h1:kBGZzfjB9CEq2AlWe17Uuf7NDRt0dE0s8S51q0aT7Yg= github.com/gorilla/mux v1.6.2/go.mod h1:1lud6UwP+6orDFRuTfBEV8e9/aOM/c4fVVCaMa2zaAs= @@ -211,7 +173,6 @@ github.com/hashicorp/go-multierror v1.0.0/go.mod h1:dHtQlpGsu+cZNNAkkCN/P3hoUDHh github.com/hashicorp/go-rootcerts v1.0.0/go.mod h1:K6zTfqpRlCUIjkwsN4Z+hiSfzSTQa6eBIzfwKfwNnHU= github.com/hashicorp/go-sockaddr v1.0.0/go.mod h1:7Xibr9yA9JjQq1JpNB2Vw7kxv8xerXegt+ozgdvDeDU= github.com/hashicorp/go-syslog v1.0.0/go.mod h1:qPfqrKkXGihmCqbJM2mZgkZGvKG1dFdvsLplgctolz4= -github.com/hashicorp/go-uuid v0.0.0-20180228145832-27454136f036/go.mod h1:6SBZvOh/SIDV7/2o3Jml5SYk/TvGqwFJ/bN7x4byOro= github.com/hashicorp/go-uuid v1.0.0/go.mod h1:6SBZvOh/SIDV7/2o3Jml5SYk/TvGqwFJ/bN7x4byOro= github.com/hashicorp/go-uuid v1.0.1/go.mod h1:6SBZvOh/SIDV7/2o3Jml5SYk/TvGqwFJ/bN7x4byOro= github.com/hashicorp/go-version v1.2.0/go.mod h1:fltr4n8CU8Ke44wwGCBoEymUuxUHl09ZGVZPK5anwXA= @@ -224,29 +185,24 @@ github.com/hashicorp/memberlist v0.1.3/go.mod h1:ajVTdAv/9Im8oMAAj5G31PhhMCZJV2p github.com/hashicorp/serf v0.8.2/go.mod h1:6hOLApaqBFA1NXqRQAsxw9QxuDEvNxSQRwA/JwenrHc= github.com/hpcloud/tail v1.0.0/go.mod h1:ab1qPbhIpdTxEkNHXyeSf5vhxWSCs/tWer42PpOxQnU= github.com/hudl/fargo v1.3.0/go.mod h1:y3CKSmjA+wD2gak7sUSXTAoopbhU08POFhmITJgmKTg= -github.com/ianlancetaylor/demangle v0.0.0-20181102032728-5e5cf60278f6/go.mod h1:aSSvb/t6k1mPoxDqO4vJh6VOCGPwU4O0C2/Eqndh1Sc= github.com/inconshreveable/mousetrap v1.0.0/go.mod h1:PxqpIevigyE2G7u3NXJIT2ANytuPF1OarO4DADm73n8= github.com/influxdata/influxdb1-client v0.0.0-20191209144304-8bf82d3c094d/go.mod h1:qj24IKcXYK6Iy9ceXlo3Tc+vtHo9lIhSX5JddghvEPo= -github.com/jcmturner/gofork v0.0.0-20180107083740-2aebee971930/go.mod h1:MK8+TM0La+2rjBD4jE12Kj1pCCxK7d2LK/UM3ncEo0o= github.com/jmespath/go-jmespath v0.0.0-20180206201540-c2b33e8439af/go.mod h1:Nht3zPeWKUH0NzdCt2Blrr5ys8VGpn0CEB0cQHVjt7k= -github.com/jmespath/go-jmespath v0.3.0/go.mod h1:9QtRXoHjLGCJ5IBSaohpXITPlowMeeYCZ7fLUTSywik= github.com/jonboulle/clockwork v0.1.0/go.mod h1:Ii8DK3G1RaLaWxj9trq07+26W01tbo22gdxWY5EU2bo= github.com/json-iterator/go v1.1.6/go.mod h1:+SdeFBvtyEkXs7REEP0seUULqWtbJapLOCVDaaPEHmU= github.com/json-iterator/go v1.1.7/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4= github.com/json-iterator/go v1.1.8/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4= -github.com/jstemmer/go-junit-report v0.0.0-20190106144839-af01ea7f8024/go.mod h1:6v2b51hI/fHJwM22ozAgKL4VKDeJcHhJFhtBdhmNjmU= -github.com/jstemmer/go-junit-report v0.9.1/go.mod h1:Brl9GWCQeLvo8nXZwPNNblvFj/XSXhF0NWZEnDohbsk= github.com/jtolds/gls v4.20.0+incompatible/go.mod h1:QJZ7F/aHp+rZTRtaJ1ow/lLfFfVYBRgL+9YlvaHOwJU= github.com/julienschmidt/httprouter v1.2.0/go.mod h1:SYymIcj16QtmaHHD7aYtjjsJG7VTCxuUUipMqKk8s4w= github.com/jung-kurt/gofpdf v1.0.0/go.mod h1:7Id9E/uU8ce6rXgefFLlgrJj/GYY22cpxn+r32jIOes= github.com/jung-kurt/gofpdf v1.0.3-0.20190309125859-24315acbbda5/go.mod h1:7Id9E/uU8ce6rXgefFLlgrJj/GYY22cpxn+r32jIOes= github.com/kisielk/errcheck v1.1.0/go.mod h1:EZBBE59ingxPouuu3KfxchcWSUPOHkagtvWXihfKN4Q= github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= +github.com/klauspost/asmfmt v1.3.1 h1:7xZi1N7s9gTLbqiM8KUv8TLyysavbTRGBT5/ly0bRtw= github.com/klauspost/asmfmt v1.3.1/go.mod h1:AG8TuvYojzulgDAMCnYn50l/5QV3Bs/tp6j0HLHbNSE= -github.com/klauspost/compress v1.9.7/go.mod h1:RyIbtBH6LamlWaDj8nUwkbUhJ87Yi3uG0guNDohfE1A= -github.com/klauspost/compress v1.13.1/go.mod h1:8dP1Hq4DHOhN9w426knH3Rhby4rFm6D8eO+e+Dq5Gzg= github.com/klauspost/compress v1.14.2 h1:S0OHlFk/Gbon/yauFJ4FfJJF5V0fc5HbBTJazi28pRw= github.com/klauspost/compress v1.14.2/go.mod h1:/3/Vjq9QcHkK5uEr5lBEmyoZ1iFhe47etQ6QUkpK6sk= +github.com/klauspost/cpuid/v2 v2.0.9 h1:lgaqFMSdTdQYdZ04uHyN2d/eKdOMyi2YLSvlQIBFYa4= github.com/klauspost/cpuid/v2 v2.0.9/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg= github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= github.com/kr/logfmt v0.0.0-20140226030751-b84e30acd515/go.mod h1:+0opPa2QZZtGFBFZlji/RkVcI2GknAs/DXo4wKdlNEc= @@ -265,7 +221,9 @@ github.com/mattn/go-isatty v0.0.4/go.mod h1:M+lRXTBqGeGNdLjl/ufCoiOlB5xdOkqRJdNx github.com/mattn/go-runewidth v0.0.2/go.mod h1:LwmH8dsx7+W8Uxz3IHJYH5QSwggIsqBzpuz5H//U1FU= github.com/matttproud/golang_protobuf_extensions v1.0.1/go.mod h1:D8He9yQNgCq6Z5Ld7szi9bcBfOoFv/3dc6xSMkL2PC0= github.com/miekg/dns v1.0.14/go.mod h1:W1PPwlIAgtquWBMBEV9nkV9Cazfe8ScdGz/Lj7v3Nrg= +github.com/minio/asm2plan9s v0.0.0-20200509001527-cdd76441f9d8 h1:AMFGa4R4MiIpspGNG7Z948v4n35fFGB3RR3G/ry4FWs= github.com/minio/asm2plan9s v0.0.0-20200509001527-cdd76441f9d8/go.mod h1:mC1jAcsrzbxHt8iiaC+zU4b1ylILSosueou12R++wfY= +github.com/minio/c2goasm v0.0.0-20190812172519-36a3d3bbc4f3 h1:+n/aFZefKZp7spd8DFdX7uMikMLXX4oubIzJF4kv/wI= github.com/minio/c2goasm v0.0.0-20190812172519-36a3d3bbc4f3/go.mod h1:RagcQ7I8IeTMnF8JTXieKnO4Z6JCsikNEzj0DwauVzE= github.com/mitchellh/cli v1.0.0/go.mod h1:hNIlj7HEI86fIcpObd7a0FcrxTWetlwJDGcceTlRvqc= github.com/mitchellh/go-homedir v1.0.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0= @@ -303,7 +261,6 @@ github.com/openzipkin/zipkin-go v0.2.1/go.mod h1:NaW6tEwdmWMaCDZzg8sh+IBNOxHMPnh github.com/openzipkin/zipkin-go v0.2.2/go.mod h1:NaW6tEwdmWMaCDZzg8sh+IBNOxHMPnhQw8ySjnjRyN4= github.com/pact-foundation/pact-go v1.0.4/go.mod h1:uExwJY4kCzNPcHRj+hCR/HBbOOIwwtUjcrb0b5/5kLM= github.com/pascaldekloe/goe v0.0.0-20180627143212-57f6aae5913c/go.mod h1:lzWF7FIEvWOWxwDKqyGYQf6ZUaNfKdP144TG7ZOy1lc= -github.com/pborman/getopt v0.0.0-20180729010549-6fdd0a2c7117/go.mod h1:85jBQOZwpVEaDAr341tbn15RS4fCAsIst0qp7i8ex1o= github.com/pborman/uuid v1.2.0/go.mod h1:X/NO0urCmaxf9VXbdlT7C2Yzkj2IKimNn4k+gtPdI/k= github.com/performancecopilot/speed v3.0.0+incompatible/go.mod h1:/CLtqpZ5gBg1M9iaPbIdPPGyKcA8hKdoy6hAWba7Yac= github.com/phpdave11/gofpdf v1.4.2/go.mod h1:zpO6xFn9yxo3YLyMvW8HcKWVdbNqgIfOOp2dXMnm1mY= @@ -311,7 +268,6 @@ github.com/phpdave11/gofpdi v1.0.12/go.mod h1:vBmVV0Do6hSBHC8uKUQ71JGW+ZGQq74llk github.com/pierrec/lz4 v1.0.2-0.20190131084431-473cd7ce01a1/go.mod h1:3/3N9NVKO0jef7pBehbT1qWhCMrIgbYNnFAZCqQ5LRc= github.com/pierrec/lz4 v2.0.5+incompatible h1:2xWsjqPFWcplujydGg4WmhC/6fZqK42wMM8aXeqhl0I= github.com/pierrec/lz4 v2.0.5+incompatible/go.mod h1:pdkljMzZIN41W+lC3N2tnIh5sFi+IEE17M5jbnwPHcY= -github.com/pierrec/lz4/v4 v4.1.8/go.mod h1:gZWDp/Ze/IJXGXf23ltt2EXimqmTUXEy0GFuRQyBid4= github.com/pierrec/lz4/v4 v4.1.12 h1:44l88ehTZAUGW4VlO1QC4zkilL99M6Y9MXNwEs0uzP8= github.com/pierrec/lz4/v4 v4.1.12/go.mod h1:gZWDp/Ze/IJXGXf23ltt2EXimqmTUXEy0GFuRQyBid4= github.com/pkg/errors v0.8.0/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= @@ -357,13 +313,13 @@ github.com/smartystreets/goconvey v1.6.4/go.mod h1:syvi0/a8iFYH4r/RixwvyeAJjdLS9 github.com/soheilhy/cmux v0.1.4/go.mod h1:IM3LyeVVIOuxMH7sFAkER9+bJ4dT7Ms6E4xg4kGIyLM= github.com/sony/gobreaker v0.4.1/go.mod h1:ZKptC7FHNvhBz7dN2LGjPVBz2sZJmc0/PkyDJOjmxWY= github.com/spaolacci/murmur3 v0.0.0-20180118202830-f09979ecbc72/go.mod h1:JwIasOWyU6f++ZhiEuf87xNszmSA2myDM2Kzu9HwQUA= -github.com/spf13/afero v1.2.2/go.mod h1:9ZxEEn6pIJ8Rxe320qSDBk6AsU0r9pR7Q4OcevTdifk= github.com/spf13/cobra v0.0.3/go.mod h1:1l0Ry5zgKvJasoi3XT1TypsSe7PqH0Sj9dhYf7v3XqQ= github.com/spf13/pflag v1.0.1/go.mod h1:DYY7MBk1bdzusC3SYhjObp+wFpr4gzcvqqNjLnInEg4= github.com/streadway/amqp v0.0.0-20190404075320-75d898a42a94/go.mod h1:AZpEONHx3DKn8O/DFsRAY58/XVQiIPMTMB1SddzLXVw= github.com/streadway/amqp v0.0.0-20190827072141-edfb9018d271/go.mod h1:AZpEONHx3DKn8O/DFsRAY58/XVQiIPMTMB1SddzLXVw= github.com/streadway/handy v0.0.0-20190108123426-d5acb3125c2a/go.mod h1:qNTQ5P5JnDBl6z3cMAg/SywNDC5ABu5ApDIw6lUbRmI= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/objx v0.1.1 h1:2vfRuCMp5sSVIDSqO8oNnWJq7mPa6KVP3iPIwFBuy8A= github.com/stretchr/objx v0.1.1/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/testify v1.2.0/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= @@ -376,23 +332,15 @@ github.com/tmc/grpc-websocket-proxy v0.0.0-20170815181823-89b8d40f7ca8/go.mod h1 github.com/urfave/cli v1.20.0/go.mod h1:70zkFmudgCuE/ngEzBv17Jvp/497gISqfk5gWijbERA= github.com/urfave/cli v1.22.1/go.mod h1:Gos4lmkARVdJ6EkW0WaNv/tZAAMe9V7XWyB60NtXRu0= github.com/xiang90/probing v0.0.0-20190116061207-43a291ad63a2/go.mod h1:UETIi67q53MR2AWcXfiuqkDkRtnGDLqkBTpCHuJHxtU= -github.com/xitongsys/parquet-go v1.5.1/go.mod h1:xUxwM8ELydxh4edHGegYq1pA8NnMKDx0K/GyB0o2bww= -github.com/xitongsys/parquet-go v1.6.2 h1:MhCaXii4eqceKPu9BwrjLqyK10oX9WF+xGhwvwbw7xM= -github.com/xitongsys/parquet-go v1.6.2/go.mod h1:IulAQyalCm0rPiZVNnCgm/PCL64X2tdSVGMQ/UeKqWA= -github.com/xitongsys/parquet-go-source v0.0.0-20190524061010-2b72cbee77d5/go.mod h1:xxCx7Wpym/3QCo6JhujJX51dzSXrwmb0oH6FQb39SEA= -github.com/xitongsys/parquet-go-source v0.0.0-20200817004010-026bad9b25d0 h1:a742S4V5A15F93smuVxA60LQWsrCnN8bKeWDBARU1/k= -github.com/xitongsys/parquet-go-source v0.0.0-20200817004010-026bad9b25d0/go.mod h1:HYhIKsdns7xz80OgkbgJYrtQY7FjHWHKH6cvN7+czGE= github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.4.1/go.mod h1:mwnBkeHKe2W/ZEtQ+71ViKU8L12m81fl3OWwC1Zlc8k= +github.com/zeebo/xxh3 v1.0.1 h1:FMSRIbkrLikb/0hZxmltpg84VkqDAT5M8ufXynuhXsI= github.com/zeebo/xxh3 v1.0.1/go.mod h1:8VHV24/3AZLn3b6Mlp/KuC33LWH687Wq6EnziEB+rsA= go.etcd.io/bbolt v1.3.3/go.mod h1:IbVyRI1SCnLcuJnV2u8VeU0CEYM7e686BmAb1XKL+uU= go.etcd.io/etcd v0.0.0-20191023171146-3cf2f69b5738/go.mod h1:dnLIgRNXwCJa5e+c6mIZCrds/GIG4ncV9HhK5PX7jPg= go.opencensus.io v0.20.1/go.mod h1:6WKK9ahsWS3RSO+PY9ZHZUfv2irvY6gN279GOPZjmmk= go.opencensus.io v0.20.2/go.mod h1:6WKK9ahsWS3RSO+PY9ZHZUfv2irvY6gN279GOPZjmmk= -go.opencensus.io v0.21.0/go.mod h1:mSImk1erAIZhrmZN+AvHh14ztQfjbGwt4TtuofqLduU= -go.opencensus.io v0.22.0/go.mod h1:+kGneAE2xo2IficOXnaByMWTGM9T73dGwxeWcUqIpI8= go.opencensus.io v0.22.2/go.mod h1:yxeiOL68Rb0Xd1ddK5vPZ/oVn4vY4Ynel7k9FzqtOIw= -go.opencensus.io v0.22.3/go.mod h1:yxeiOL68Rb0Xd1ddK5vPZ/oVn4vY4Ynel7k9FzqtOIw= go.opentelemetry.io/otel v0.20.0/go.mod h1:Y3ugLH2oa81t5QO+Lty+zXf8zC9L26ax4Nzoxm/dooo= go.opentelemetry.io/otel/metric v0.20.0/go.mod h1:598I5tYlH1vzBjn+BTuhzTCSb/9debfNp6R3s7Pr1eU= go.opentelemetry.io/otel/oteltest v0.20.0/go.mod h1:L7bgKf9ZB7qCwT9Up7i9/pn0PWIa9FqQ2IQ8LoxiGnw= @@ -411,12 +359,10 @@ go.uber.org/tools v0.0.0-20190618225709-2cfd321de3ee/go.mod h1:vJERXedbb3MVM5f9E go.uber.org/zap v1.10.0/go.mod h1:vwi/ZaCAaUcBkycHslxD9B2zi4UTXhF60s6SWpuDF0Q= go.uber.org/zap v1.13.0/go.mod h1:zwrFLgMcdUuIBviXEYEH1YKNaOBnKXsx2IPda5bBwHM= go.uber.org/zap v1.16.0/go.mod h1:MA8QOfq0BHJwdXa996Y4dYkAqRKB8/1K1QMMZVaNZjQ= -golang.org/x/crypto v0.0.0-20180723164146-c126467f60eb/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4= golang.org/x/crypto v0.0.0-20180904163835-0709b304e793/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4= golang.org/x/crypto v0.0.0-20181029021203-45a5f77698d3/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= golang.org/x/crypto v0.0.0-20190510104115-cbcb75029529/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= -golang.org/x/crypto v0.0.0-20190605123033-f99c8df09eb5/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= golang.org/x/crypto v0.0.0-20190701094942-4def268fd1a4/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= @@ -426,16 +372,8 @@ golang.org/x/exp v0.0.0-20180807140117-3d87b88a115f/go.mod h1:CJ0aWSM057203Lf6IL golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= golang.org/x/exp v0.0.0-20190125153040-c74c464bbbf2/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= golang.org/x/exp v0.0.0-20190306152737-a1d7652674e8/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= -golang.org/x/exp v0.0.0-20190510132918-efd6b22b2522/go.mod h1:ZjyILWgesfNpC6sMxTJOJm9Kp84zZh5NQWvqDGG3Qr8= golang.org/x/exp v0.0.0-20190731235908-ec7cb31e5a56/go.mod h1:JhuoJpWY28nO4Vef9tZUw9qufEGTyX1+7lmHxV5q5G4= -golang.org/x/exp v0.0.0-20190829153037-c13cbed26979/go.mod h1:86+5VVa7VpoJ4kLfm080zCjGlMRFzhUhsZKEZO7MGek= golang.org/x/exp v0.0.0-20191002040644-a1355ae1e2c3/go.mod h1:NOZ3BPKG0ec/BKJQgnvsSFpcKLM5xXVWnvZS97DWHgE= -golang.org/x/exp v0.0.0-20191030013958-a1ab85dbe136/go.mod h1:JXzH8nQsPlswgeRAPE3MuO9GYsAcnJvJ4vnMwN/5qkY= -golang.org/x/exp v0.0.0-20191129062945-2f5052295587/go.mod h1:2RIsYlXP63K8oxa1u096TMicItID8zy7Y6sNkU49FU4= -golang.org/x/exp v0.0.0-20191227195350-da58074b4299/go.mod h1:2RIsYlXP63K8oxa1u096TMicItID8zy7Y6sNkU49FU4= -golang.org/x/exp v0.0.0-20200119233911-0405dc783f0a/go.mod h1:2RIsYlXP63K8oxa1u096TMicItID8zy7Y6sNkU49FU4= -golang.org/x/exp v0.0.0-20200207192155-f17229e696bd/go.mod h1:J/WKrq2StrnmMY6+EHIKF9dgMWnmCNThgcyBT1FY9mM= -golang.org/x/exp v0.0.0-20200224162631-6cc2880d07d6/go.mod h1:3jZMyOhIsHpP37uCMkUooju7aAi5cS1Q23tOzKc+0MU= golang.org/x/exp v0.0.0-20211216164055-b2b84827b756 h1:/5Bs7sWi0i3rOVO5KnM55OwugpsD4bRW1zywKoZjbkI= golang.org/x/exp v0.0.0-20211216164055-b2b84827b756/go.mod h1:b9TAUYHmRtqA6klRHApnXMnj+OyLce4yF5cZCUbk2ps= golang.org/x/image v0.0.0-20180708004352-c73c2afc3b81/go.mod h1:ux5Hcp/YLpHSI86hEcLt0YII63i6oz57MZXIpbrjZUs= @@ -451,24 +389,19 @@ golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTk golang.org/x/lint v0.0.0-20190227174305-5b3e6a55c961/go.mod h1:wehouNa3lNwaWXcvxsM5YxQ5yQlVC4a0KAMCusXpPoU= golang.org/x/lint v0.0.0-20190301231843-5614ed5bae6f/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE= golang.org/x/lint v0.0.0-20190313153728-d0100b6bd8b3/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc= -golang.org/x/lint v0.0.0-20190409202823-959b441ac422/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc= -golang.org/x/lint v0.0.0-20190909230951-414d861bb4ac/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc= golang.org/x/lint v0.0.0-20190930215403-16217165b5de/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc= -golang.org/x/lint v0.0.0-20191125180803-fdd1cda4f05f/go.mod h1:5qLYkcX4OjUUV8bRuDixDT3tpyyb+LUpUlRWLxfhWrs= -golang.org/x/lint v0.0.0-20200130185559-910be7a94367/go.mod h1:3xt1FjdF8hUf6vQPIChWIBhFzV8gjjsPE/fR3IyQdNY= golang.org/x/mobile v0.0.0-20190312151609-d3739f865fa6/go.mod h1:z+o9i4GpDbdi3rU15maQ/Ox0txvL9dWGYEHz965HBQE= golang.org/x/mobile v0.0.0-20190719004257-d2bd2a29d028/go.mod h1:E/iHnbuqvinMTCcRqshq8CkpyQDoeVncDDYHnLhea+o= golang.org/x/mobile v0.0.0-20201217150744-e6ae53a27f4f/go.mod h1:skQtrUTUwhdJvXM/2KKJzY8pDgNr9I/FOMqDVRPBUS4= golang.org/x/mod v0.0.0-20190513183733-4bf6d317e70e/go.mod h1:mXi4GBBbnImb6dmsKGUJ2LatrhH/nqhxcFungHvyanc= golang.org/x/mod v0.1.0/go.mod h1:0QHyrYULN0/3qlju5TqG8bIK38QM8yzMo5ekMj3DlcY= golang.org/x/mod v0.1.1-0.20191105210325-c90efee705ee/go.mod h1:QqPTAvyqsEbceGzBzNggFXnrqF1CaUcvgkdR5Ot7KZg= -golang.org/x/mod v0.1.1-0.20191107180719-034126e5016b/go.mod h1:QqPTAvyqsEbceGzBzNggFXnrqF1CaUcvgkdR5Ot7KZg= golang.org/x/mod v0.1.1-0.20191209134235-331c550502dd/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= -golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.5.1/go.mod h1:5OXOZSfqPIIbmVBIIKWRFfZjPR0E5r58TLhUjH0a2Ro= -golang.org/x/mod v0.6.0-dev.0.20211013180041-c96bc1413d57 h1:LQmS1nU0twXLA96Kt7U9qtHJEbBk3z6Q0V4UXjZkpr4= golang.org/x/mod v0.6.0-dev.0.20211013180041-c96bc1413d57/go.mod h1:3p9vT2HGsQu2K1YbXdKPJLVgG5VJdoTa1poYQBtP1AY= +golang.org/x/mod v0.6.0-dev.0.20220106191415-9b9b3d81d5e3 h1:kQgndtyPBW/JIYERgdxfwMYh3AVStj88WQTlNDi2a+o= +golang.org/x/mod v0.6.0-dev.0.20220106191415-9b9b3d81d5e3/go.mod h1:3p9vT2HGsQu2K1YbXdKPJLVgG5VJdoTa1poYQBtP1AY= golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20180906233101-161cd47e91fd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= @@ -481,27 +414,19 @@ golang.org/x/net v0.0.0-20190125091013-d26f9f9a57f3/go.mod h1:mL1N/T3taQHkDXs73r golang.org/x/net v0.0.0-20190213061140-3a22650c66bd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= -golang.org/x/net v0.0.0-20190501004415-9ce7a6920f09/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= -golang.org/x/net v0.0.0-20190503192946-f4e77d36d62c/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= golang.org/x/net v0.0.0-20190603091049-60506f45cf65/go.mod h1:HSz+uSET+XFnRR8LxR5pz3Of3rY3CfYBVs4xY44aLks= golang.org/x/net v0.0.0-20190613194153-d28f0bde5980/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= -golang.org/x/net v0.0.0-20190724013045-ca1201d0de80/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20190813141303-74dc4d7220e7/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= -golang.org/x/net v0.0.0-20191209160850-c0dbc17a3553/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= -golang.org/x/net v0.0.0-20200114155413-6afb5195e5aa/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= -golang.org/x/net v0.0.0-20200202094626-16171245cfb2/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= -golang.org/x/net v0.0.0-20200222125558-5a598a2470a0/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20200822124328-c89045814202/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA= golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= golang.org/x/net v0.0.0-20210405180319-a5a99cb37ef4/go.mod h1:p54w0d4576C0XHj96bSt6lcn1PtDYWL6XObtHCRCNQM= golang.org/x/net v0.0.0-20211015210444-4f30a5c0130f/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= +golang.org/x/net v0.0.0-20220127200216-cd36cc0744dd h1:O7DYs+zxREGLKzKoMQrtrEacpb0ZVXA5rIwylE2Xchk= golang.org/x/net v0.0.0-20220127200216-cd36cc0744dd/go.mod h1:CfG3xpIq0wQ8r1q4Su4UZFWDARRcnwPjda9FqA0JpMk= golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= -golang.org/x/oauth2 v0.0.0-20190604053449-0f29369cfe45/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= -golang.org/x/oauth2 v0.0.0-20191202225959-858c2ad4c8b6/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= golang.org/x/oauth2 v0.0.0-20200107190931-bf48bf16ab8d/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= @@ -524,21 +449,10 @@ golang.org/x/sys v0.0.0-20190312061237-fead79001313/go.mod h1:h1NjWce9XRLGQEsW7w golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190422165155-953cdadca894/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190502145724-3ef323f4f1fd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20190507160741-ecd444e8653b/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20190606165138-5da285871e9c/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20190624142023-c5567b49c5d0/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190726091711-fc99dfbffb4e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190826190057-c7b8b68b1456/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20191001151750-bb3f8db39f24/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20191026070338-33540a1f6037/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20191204072324-ce4227a45e2e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20191220142924-d4481acd189f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20191228213918-04cbcbbfeed8/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20200113162924-86b910548bc1/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20200122134326-e047566fdf82/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20200202164722-d101bd2416d5/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20200212091648-12a6c2dcc1e4/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20200223170610-d5e6a3e2c0ae/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= @@ -551,21 +465,18 @@ golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBc golang.org/x/sys v0.0.0-20211019181941-9d821ace8654/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20211216021012-1d35b9e2eb4e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220128215802-99c3d69c2c27/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.0.0-20220209214540-3681064d5158 h1:rm+CHSpPEEW2IsXUib1ThaHIjuBVZjxNgSKmBLFfD4c= -golang.org/x/sys v0.0.0-20220209214540-3681064d5158/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220406163625-3f8b81556e12 h1:QyVthZKMsyaQwBTJE04jdNN0Pp5Fn9Qga0mrgxyERQM= +golang.org/x/sys v0.0.0-20220406163625-3f8b81556e12/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= -golang.org/x/text v0.0.0-20170915032832-14c0d48ead0c/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= -golang.org/x/text v0.3.1-0.20180807135948-17ff2d5776d2/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.5/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/text v0.3.7 h1:olpwvP2KacW1ZWvsR7uQhoyTYvKAupfQrRGBFM352Gk= golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= golang.org/x/time v0.0.0-20180412165947-fbb02b2291d2/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= -golang.org/x/time v0.0.0-20181108054448-85acf8d2951c/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= -golang.org/x/time v0.0.0-20190308202827-9d24e82272b4/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/time v0.0.0-20191024005414-555d28b269f0/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/tools v0.0.0-20180221164845-07fd8470d635/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20180525024113-a5b4c53f6e8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= @@ -578,38 +489,19 @@ golang.org/x/tools v0.0.0-20190311212946-11955173bddd/go.mod h1:LCzVGOaR6xXOjkQ3 golang.org/x/tools v0.0.0-20190312151545-0bb0c0a6e846/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= golang.org/x/tools v0.0.0-20190312170243-e65039ee4138/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= golang.org/x/tools v0.0.0-20190328211700-ab21143f2384/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= -golang.org/x/tools v0.0.0-20190425150028-36563e24a262/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q= -golang.org/x/tools v0.0.0-20190506145303-2d16b83fe98c/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q= golang.org/x/tools v0.0.0-20190524140312-2c0ae7006135/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q= -golang.org/x/tools v0.0.0-20190606124116-d0a3d012864b/go.mod h1:/rFqwRUd4F7ZHNgwSSTFct+R/Kf4OFW1sUzUTQQTgfc= golang.org/x/tools v0.0.0-20190621195816-6e04913cbbac/go.mod h1:/rFqwRUd4F7ZHNgwSSTFct+R/Kf4OFW1sUzUTQQTgfc= -golang.org/x/tools v0.0.0-20190628153133-6cdbf07be9d0/go.mod h1:/rFqwRUd4F7ZHNgwSSTFct+R/Kf4OFW1sUzUTQQTgfc= -golang.org/x/tools v0.0.0-20190816200558-6889da9d5479/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= -golang.org/x/tools v0.0.0-20190911174233-4f2ddba30aff/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.0.0-20190927191325-030b2cf1153e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= -golang.org/x/tools v0.0.0-20191012152004-8de300cfc20a/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.0.0-20191029041327-9cc4af7d6b2c/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.0.0-20191029190741-b9c20aec41a5/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= -golang.org/x/tools v0.0.0-20191113191852-77e3bb0ad9e7/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= -golang.org/x/tools v0.0.0-20191115202509-3a792d9c32b2/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= -golang.org/x/tools v0.0.0-20191125144606-a911d9008d1f/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= -golang.org/x/tools v0.0.0-20191130070609-6e064ea0cf2d/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= -golang.org/x/tools v0.0.0-20191216173652-a0e659d51361/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= -golang.org/x/tools v0.0.0-20191227053925-7b8e75db28f4/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= golang.org/x/tools v0.0.0-20200103221440-774c71fcf114/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= golang.org/x/tools v0.0.0-20200117012304-6edc0a871e69/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= -golang.org/x/tools v0.0.0-20200117161641-43d50277825c/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= -golang.org/x/tools v0.0.0-20200122220014-bf1340f18c4a/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= -golang.org/x/tools v0.0.0-20200130002326-2f3ba24bd6e7/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= -golang.org/x/tools v0.0.0-20200204074204-1cc6d1ef6c74/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= -golang.org/x/tools v0.0.0-20200207183749-b753a1ba74fa/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= -golang.org/x/tools v0.0.0-20200212150539-ea181f53ac56/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= -golang.org/x/tools v0.0.0-20200224181240-023911ca70b2/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= golang.org/x/tools v0.1.0/go.mod h1:xkSsbof2nBLbhDlRMhhhyNLN/zl3eTqcnHD5viDpcZ0= golang.org/x/tools v0.1.8-0.20211029000441-d6a9af8af023/go.mod h1:nABZi5QlRsZVlzPpHl034qft6wpY4eDcsTt5AaioBiU= -golang.org/x/tools v0.1.9 h1:j9KsMiaP1c3B0OTQGth0/k+miLGTgLsAFUCrF2vLcF8= golang.org/x/tools v0.1.9/go.mod h1:nABZi5QlRsZVlzPpHl034qft6wpY4eDcsTt5AaioBiU= +golang.org/x/tools v0.1.10 h1:QjFRCZxdOhBJ/UNgnBZLbNV13DlbnK0quyivTnXJM20= +golang.org/x/tools v0.1.10/go.mod h1:Uh6Zz+xoGYZom868N8YTex3t7RhtHDBrE8Gzo9bV56E= golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= @@ -623,58 +515,33 @@ gonum.org/v1/netlib v0.0.0-20190313105609-8cb42192e0e0/go.mod h1:wa6Ws7BG/ESfp6d gonum.org/v1/plot v0.0.0-20190515093506-e2840ee46a6b/go.mod h1:Wt8AAjI+ypCyYX3nZBvf6cAIx93T+c/OS2HFAYskSZc= gonum.org/v1/plot v0.9.0/go.mod h1:3Pcqqmp6RHvJI72kgb8fThyUnav364FOsdDo2aGW5lY= google.golang.org/api v0.3.1/go.mod h1:6wY9I6uQWHQ8EM57III9mq/AjF+i8G65rmVagqKMtkk= -google.golang.org/api v0.4.0/go.mod h1:8k5glujaEP+g9n7WNsDg8QP6cUVNI86fCNMcbazEtwE= -google.golang.org/api v0.7.0/go.mod h1:WtwebWUNSVBH/HAw79HIFXZNqEvBhG+Ra+ax0hx3E3M= -google.golang.org/api v0.8.0/go.mod h1:o4eAsZoiT+ibD93RtjEohWalFOjRDx6CVaqeizhEnKg= -google.golang.org/api v0.9.0/go.mod h1:o4eAsZoiT+ibD93RtjEohWalFOjRDx6CVaqeizhEnKg= -google.golang.org/api v0.13.0/go.mod h1:iLdEw5Ide6rF15KTC1Kkl0iskquN2gFfn9o9XIsbkAI= -google.golang.org/api v0.14.0/go.mod h1:iLdEw5Ide6rF15KTC1Kkl0iskquN2gFfn9o9XIsbkAI= -google.golang.org/api v0.15.0/go.mod h1:iLdEw5Ide6rF15KTC1Kkl0iskquN2gFfn9o9XIsbkAI= -google.golang.org/api v0.17.0/go.mod h1:BwFmGc8tA3vsd7r/7kR8DY7iEEGSU04BFxCo5jP/sfE= -google.golang.org/api v0.18.0/go.mod h1:BwFmGc8tA3vsd7r/7kR8DY7iEEGSU04BFxCo5jP/sfE= google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM= google.golang.org/appengine v1.2.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= -google.golang.org/appengine v1.5.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= -google.golang.org/appengine v1.6.1/go.mod h1:i06prIuMbXzDqacNJfV5OdTW448YApPu5ww/cMBSeb0= -google.golang.org/appengine v1.6.5/go.mod h1:8WjMMxjGQR8xUklV/ARdw2HLXBOI7O7uCIDZVag1xfc= google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc= google.golang.org/genproto v0.0.0-20190307195333-5fe7a883aa19/go.mod h1:VzzqZJRnGkLBvHegQrXjBqPurQTc5/KpmUdxsrq26oE= -google.golang.org/genproto v0.0.0-20190418145605-e7d98fc518a7/go.mod h1:VzzqZJRnGkLBvHegQrXjBqPurQTc5/KpmUdxsrq26oE= google.golang.org/genproto v0.0.0-20190425155659-357c62f0e4bb/go.mod h1:VzzqZJRnGkLBvHegQrXjBqPurQTc5/KpmUdxsrq26oE= -google.golang.org/genproto v0.0.0-20190502173448-54afdca5d873/go.mod h1:VzzqZJRnGkLBvHegQrXjBqPurQTc5/KpmUdxsrq26oE= google.golang.org/genproto v0.0.0-20190530194941-fb225487d101/go.mod h1:z3L6/3dTEVtUr6QSP8miRzeRqwQOioJ9I66odjN4I7s= -google.golang.org/genproto v0.0.0-20190801165951-fa694d86fc64/go.mod h1:DMBHOl98Agz4BDEuKkezgsaosCRResVns1a3J2ZsMNc= google.golang.org/genproto v0.0.0-20190819201941-24fa4b261c55/go.mod h1:DMBHOl98Agz4BDEuKkezgsaosCRResVns1a3J2ZsMNc= -google.golang.org/genproto v0.0.0-20190911173649-1774047e7e51/go.mod h1:IbNlFCBrqXvoKpeg0TB2l7cyZUmoaFKYIwrEpbDKLA8= -google.golang.org/genproto v0.0.0-20191108220845-16a3f7862a1a/go.mod h1:n3cpQtvxv34hfy77yVDNjmbRyujviMdxYliBSkLhpCc= -google.golang.org/genproto v0.0.0-20191115194625-c23dd37a84c9/go.mod h1:n3cpQtvxv34hfy77yVDNjmbRyujviMdxYliBSkLhpCc= -google.golang.org/genproto v0.0.0-20191216164720-4f79533eabd1/go.mod h1:n3cpQtvxv34hfy77yVDNjmbRyujviMdxYliBSkLhpCc= -google.golang.org/genproto v0.0.0-20191230161307-f3c370f40bfb/go.mod h1:n3cpQtvxv34hfy77yVDNjmbRyujviMdxYliBSkLhpCc= -google.golang.org/genproto v0.0.0-20200115191322-ca5a22157cba/go.mod h1:n3cpQtvxv34hfy77yVDNjmbRyujviMdxYliBSkLhpCc= -google.golang.org/genproto v0.0.0-20200122232147-0452cf42e150/go.mod h1:n3cpQtvxv34hfy77yVDNjmbRyujviMdxYliBSkLhpCc= -google.golang.org/genproto v0.0.0-20200204135345-fa8e72b47b90/go.mod h1:GmwEX6Z4W5gMy59cAlVYjN9JhxgbQH6Gn+gFDQe2lzA= -google.golang.org/genproto v0.0.0-20200212174721-66ed5ce911ce/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c= -google.golang.org/genproto v0.0.0-20200224152610-e50cd9704f63/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c= google.golang.org/genproto v0.0.0-20200513103714-09dca8ec2884/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c= google.golang.org/genproto v0.0.0-20200526211855-cb27e3aa2013/go.mod h1:NbSheEEYHJ7i3ixzK3sjbqSGDJWnxyFXZblF3eUsNvo= +google.golang.org/genproto v0.0.0-20220126215142-9970aeb2e350 h1:YxHp5zqIcAShDEvRr5/0rVESVS+njYF68PSdazrNLJo= google.golang.org/genproto v0.0.0-20220126215142-9970aeb2e350/go.mod h1:5CzLGKJ67TSI2B9POpiiyGha0AjJvZIUgRMt1dSmuhc= google.golang.org/grpc v1.17.0/go.mod h1:6QZJwpn2B+Zp71q/5VxRsJ6NXXVCE5NRUHRo+f3cWCs= google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c= google.golang.org/grpc v1.20.0/go.mod h1:chYK+tFQF0nDUGJgXMSgLCQk3phJEuONr2DCgLDdAQM= google.golang.org/grpc v1.20.1/go.mod h1:10oTOabMzJvdu6/UiuZezV6QK5dSlG84ov/aaiqXj38= google.golang.org/grpc v1.21.0/go.mod h1:oYelfM1adQP15Ek0mdvEgi9Df8B9CZIaU1084ijfRaM= -google.golang.org/grpc v1.21.1/go.mod h1:oYelfM1adQP15Ek0mdvEgi9Df8B9CZIaU1084ijfRaM= google.golang.org/grpc v1.22.1/go.mod h1:Y5yQAOtifL1yxbo5wqy6BxZv8vAUGQwXBOALyacEbxg= google.golang.org/grpc v1.23.0/go.mod h1:Y5yQAOtifL1yxbo5wqy6BxZv8vAUGQwXBOALyacEbxg= google.golang.org/grpc v1.23.1/go.mod h1:Y5yQAOtifL1yxbo5wqy6BxZv8vAUGQwXBOALyacEbxg= google.golang.org/grpc v1.25.1/go.mod h1:c3i+UQWmh7LiEpx4sFZnkU36qjEYZ0imhYfXVyQciAY= google.golang.org/grpc v1.26.0/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk= google.golang.org/grpc v1.27.0/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk= -google.golang.org/grpc v1.27.1/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk= google.golang.org/grpc v1.33.1/go.mod h1:fr5YgcSWrqhRRxogOsw7RzIpsmvOZ6IcH4kBYTpR3n0= google.golang.org/grpc v1.36.0/go.mod h1:qjiiYl8FncCW8feJPdyg3v6XW24KsRHe+dy9BAGRRjU= google.golang.org/grpc v1.40.0/go.mod h1:ogyxbiOoUXAkP+4+xa6PZSE9DZgIHtSpzjDTB9KAK34= +google.golang.org/grpc v1.44.0 h1:weqSxi/TMs1SqFRMHCtBgXRs8k3X39QIDEZ0pRcttUg= google.golang.org/grpc v1.44.0/go.mod h1:k+4IHHFw41K8+bbowsex27ge2rCb65oeWqe4jJ590SU= google.golang.org/protobuf v0.0.0-20200109180630-ec00e32a8dfd/go.mod h1:DFci5gLYBciE7Vtevhsrf46CRTquxDuWsQurQQe4oz8= google.golang.org/protobuf v0.0.0-20200221191635-4d8936d0db64/go.mod h1:kwYJMbMJ01Woi6D6+Kah6886xMZcty6N08ah7+eCXa0= @@ -687,6 +554,7 @@ google.golang.org/protobuf v1.23.1-0.20200526195155-81db48ad09cc/go.mod h1:EGpAD google.golang.org/protobuf v1.25.0/go.mod h1:9JNX74DMeImyA3h4bdi1ymwjUzf21/xIlbajtzgsN7c= google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw= google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc= +google.golang.org/protobuf v1.27.1 h1:SnqbnDw1V7RiZcXPx5MEeqPv2s79L9i7BJUlG/+RurQ= google.golang.org/protobuf v1.27.1/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc= gopkg.in/alecthomas/kingpin.v2 v2.2.6/go.mod h1:FMv+mEhP44yOT+4EoQTLFTRgOQ1FBLkstjWtayDeSgw= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= @@ -697,11 +565,6 @@ gopkg.in/cheggaaa/pb.v1 v1.0.25/go.mod h1:V/YB90LKu/1FcN3WVnfiiE5oMCibMjukxqG/qS gopkg.in/errgo.v2 v2.1.0/go.mod h1:hNsd1EY+bozCKY1Ytp96fpM3vjJbqLJn88ws8XvfDNI= gopkg.in/fsnotify.v1 v1.4.7/go.mod h1:Tz8NjZHkW78fSQdbUxIjBTcgA1z1m8ZHf0WmKUhAMys= gopkg.in/gcfg.v1 v1.2.3/go.mod h1:yesOnuUOFQAhST5vPY4nbZsb/huCgGGXlipJsBn0b3o= -gopkg.in/jcmturner/aescts.v1 v1.0.1/go.mod h1:nsR8qBOg+OucoIW+WMhB3GspUQXq9XorLnQb9XtvcOo= -gopkg.in/jcmturner/dnsutils.v1 v1.0.1/go.mod h1:m3v+5svpVOhtFAP/wSz+yzh4Mc0Fg7eRhxkJMWSIz9Q= -gopkg.in/jcmturner/goidentity.v3 v3.0.0/go.mod h1:oG2kH0IvSYNIu80dVAyu/yoefjq1mNfM5bm88whjWx4= -gopkg.in/jcmturner/gokrb5.v7 v7.3.0/go.mod h1:l8VISx+WGYp+Fp7KRbsiUuXTTOnxIc3Tuvyavf11/WM= -gopkg.in/jcmturner/rpc.v1 v1.1.0/go.mod h1:YIdkC4XfD6GXbzje11McwsDuOlZQSb9W4vfLvuNnlv8= gopkg.in/resty.v1 v1.12.0/go.mod h1:mDo4pnntr5jdWRML875a/NmxYqAlA73dVijT2AXvQQo= gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7/go.mod h1:dt/ZhP58zS4L8KSrWDmTeBkI65Dw0HsyUHuEVlX15mw= gopkg.in/warnings.v0 v0.1.2/go.mod h1:jksf8JmL6Qr/oQM2OXTHunEvvTAsrWBLb6OOjuVWRNI= @@ -713,15 +576,9 @@ gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c h1:dUUwHk2QECo/6vqA44rthZ8ie gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= honnef.co/go/tools v0.0.0-20180728063816-88497007e858/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= honnef.co/go/tools v0.0.0-20190102054323-c2f93a96b099/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= -honnef.co/go/tools v0.0.0-20190106161140-3f1c8253044a/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= -honnef.co/go/tools v0.0.0-20190418001031-e561f6794a2a/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= honnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= honnef.co/go/tools v0.0.1-2019.2.3/go.mod h1:a3bituU0lyd329TUQxRnasdCoJDkEUEAqEt0JzvZhAg= -honnef.co/go/tools v0.0.1-2020.1.3/go.mod h1:X/FiERA/W4tHapMX5mGpAtMSVEeEUOyHaw9vFzvIQ3k= honnef.co/go/tools v0.1.3/go.mod h1:NgwopIslSNH47DimFoV78dnkksY2EFtX0ajyb3K/las= -rsc.io/binaryregexp v0.2.0/go.mod h1:qTv7/COck+e2FymRvadv62gMdZztPaShugOCi3I+8D8= rsc.io/pdf v0.1.1/go.mod h1:n8OzWcQ6Sp37PL01nO98y4iUCRdTGarVfzxY20ICaU4= -rsc.io/quote/v3 v3.1.0/go.mod h1:yEA65RcK8LyAZtP9Kv3t0HmxON59tX3rD+tICJqUlj0= -rsc.io/sampler v1.3.0/go.mod h1:T1hPZKmBbMNahiBKFy5HrXp6adAjACjK9JXDnKaTXpA= sigs.k8s.io/yaml v1.1.0/go.mod h1:UJmg0vDUVViEyp3mgSv9WPwZCDxu4rQW1olrI1uml+o= sourcegraph.com/sourcegraph/appdash v0.0.0-20190731080439-ebfcffb1b5c0/go.mod h1:hI742Nqp5OhwiqlzhgfbWU4mW4yO10fP+LoT9WOswdU= From e2cdb5fdc976311468e097a86e06423dbad23e1a Mon Sep 17 00:00:00 2001 From: agelloz Date: Wed, 13 Apr 2022 10:01:42 +0200 Subject: [PATCH 02/29] first --- Makefile | 5 +++++ bow.go | 13 ++++++++++++- 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 8113222..6df28ff 100644 --- a/Makefile +++ b/Makefile @@ -5,6 +5,7 @@ install: @go install golang.org/x/perf/cmd/benchstat@latest @go install github.com/jstemmer/go-junit-report@latest @go install github.com/Metronlab/genius@latest + @go install golang.org/x/tools/cmd/godoc@latest gen: @go generate $(PKG) @@ -21,6 +22,10 @@ test: bench: @RUN=$(RUN) PKG=$(PKG) TIMEOUT=$(TIMEOUT) bash -c $(PWD)/scripts/benchmark.sh +doc: + godoc -http=:6060 -goroot=$(HOME)/go & + echo http://localhost:6060/pkg/bow + CPUPROFILE=/tmp/$(shell basename $(PWD))$(shell echo $(PKG) | sed 's/[^[:alnum:]\t]//g').cpu.prof MEMPROFILE=/tmp/$(shell basename $(PWD))$(shell echo $(PKG) | sed 's/[^[:alnum:]\t]//g').mem.prof diff --git a/bow.go b/bow.go index f9e755f..d287a6c 100644 --- a/bow.go +++ b/bow.go @@ -96,6 +96,7 @@ type bow struct { arrow.Record } +// NewBowEmpty returns a new empty Bow func NewBowEmpty() Bow { var fields []arrow.Field var arrays []arrow.Array @@ -103,6 +104,7 @@ func NewBowEmpty() Bow { return &bow{Record: array.NewRecord(schema, arrays, 0)} } +// NewBow returns a new Bow from one or more Series func NewBow(series ...Series) (Bow, error) { rec, err := newRecord(Metadata{}, series...) if err != nil { @@ -141,7 +143,7 @@ func NewBowFromColBasedInterfaces(colNames []string, colTypes []Type, colData [] return NewBow(seriesSlice...) } -// NewBowFromRowBasedInterfaces returns a new bow from row based data +// NewBowFromRowBasedInterfaces returns a new Bow from row based data func NewBowFromRowBasedInterfaces(colNames []string, colTypes []Type, rowBasedData [][]interface{}) (Bow, error) { if len(colNames) != len(colTypes) { return nil, errors.New( @@ -172,6 +174,7 @@ func NewBowFromRowBasedInterfaces(colNames []string, colTypes []Type, rowBasedDa return NewBow(seriesSlice...) } +// NewEmptySlice returns an empty slice of the Bow func (b *bow) NewEmptySlice() Bow { return b.NewSlice(0, 0) } @@ -214,6 +217,7 @@ func (b *bow) DropNils(colIndices ...int) (Bow, error) { return AppendBows(bowSlice...) } +// GetRowsChan returns a chan of rows as map[string]interface{} func (b *bow) GetRowsChan() <-chan map[string]interface{} { rows := make(chan map[string]interface{}) go b.getRowsChan(rows) @@ -233,6 +237,7 @@ func (b *bow) getRowsChan(rows chan map[string]interface{}) { } } +// Equal returns true if the two Bow are equal: their record, schema and metadata should be equal. func (b *bow) Equal(other Bow) bool { b2, ok := other.(*bow) if !ok { @@ -283,12 +288,15 @@ func (b *bow) Equal(other Bow) bool { return true } +// NewSlice returns a new Bow with a zero-copy slice of the Bow arrow.Record. +// i and j being the minimum and maximum rows respectively. func (b *bow) NewSlice(i, j int) Bow { return &bow{ Record: b.Record.NewSlice(int64(i), int64(j)), } } +// Select returns a copy of the Bow, including only the columns from `colIndices`. func (b *bow) Select(colIndices ...int) (Bow, error) { if len(colIndices) == 0 { return NewBowWithMetadata(b.Metadata()) @@ -309,6 +317,7 @@ func (b *bow) Select(colIndices ...int) (Bow, error) { return NewBowWithMetadata(b.Metadata(), seriesSlice...) } +// NumRows returns the number of rows in the Bow func (b *bow) NumRows() int { if b.Record == nil { return 0 @@ -317,6 +326,7 @@ func (b *bow) NumRows() int { return int(b.Record.NumRows()) } +// NumCols returns the number of columns in the Bow func (b *bow) NumCols() int { if b.Record == nil { return 0 @@ -325,6 +335,7 @@ func (b *bow) NumCols() int { return int(b.Record.NumCols()) } +// AddCols returns a copy of the Bow with extra columns from the `seriesSlice`. func (b *bow) AddCols(seriesSlice ...Series) (Bow, error) { if len(seriesSlice) == 0 { return b, nil From 7d917dbb69d43b74b952f18648a48bce01e60715 Mon Sep 17 00:00:00 2001 From: agelloz Date: Mon, 25 Apr 2022 16:31:49 +0200 Subject: [PATCH 03/29] doc --- Makefile | 6 +- XXXexamples_test.go | 51 +++- arrowtests/arrow.go | 12 +- arrowtests/arrow_test.go | 4 +- bow.go | 132 ++++----- bowappend.gen.go | 7 +- bowappend.gen.go.tmpl | 7 +- bowbuffer.gen.go | 10 + bowbuffer.gen.go.tmpl | 10 + bowbuffer.go | 14 +- bowconvert.go | 129 +++++---- bowdiff.go | 4 +- bowfill.go | 20 +- bowfind.go | 20 +- bowgenerator.go | 27 +- bowgenerator_test.go | 2 +- bowgetters.go | 163 +++++++---- bowjoin.go | 12 +- bowjoin_test.go | 8 +- bowjson.go | 35 ++- bowmetadata.go | 86 +++--- bowparquet.go | 2 +- bowrecord.go | 9 - bowseries.gen.go | 127 ++++---- bowseries.gen.go.tmpl | 71 +++-- bowseries.gen_test.go | 2 +- bowseries.go | 7 +- bowsetters.go | 42 +-- bowsort.go | 10 +- bowstring.go | 4 +- bowtypes.go | 33 ++- rolling/aggregation.go | 258 ++++++++--------- rolling/aggregation/XXXbenchmarks_test.go | 16 +- rolling/aggregation/core_test.go | 6 +- rolling/aggregation/integral.go | 2 +- rolling/aggregation/integral_test.go | 4 +- rolling/aggregation/weightedmean.go | 4 +- rolling/aggregation/whole.go | 93 ++++++ ...wholeaggregation_test.go => whole_test.go} | 12 +- rolling/aggregation/wholeaggregation.go | 114 -------- rolling/aggregation/windowstart.go | 2 +- rolling/aggregation_test.go | 58 ++-- rolling/interpolation.go | 125 ++++---- rolling/interpolation/linear.go | 2 +- rolling/interpolation/linear_test.go | 4 +- rolling/interpolation/windowstart.go | 2 +- rolling/interpolation_test.go | 6 +- rolling/rolling.go | 274 +++++++++--------- rolling/rolling_test.go | 263 +++++++---------- .../transformation}/factor.go | 6 +- .../transformation}/factor_test.go | 2 +- rolling/window.go | 23 +- 52 files changed, 1199 insertions(+), 1143 deletions(-) create mode 100644 rolling/aggregation/whole.go rename rolling/aggregation/{wholeaggregation_test.go => whole_test.go} (95%) delete mode 100644 rolling/aggregation/wholeaggregation.go rename {transform => rolling/transformation}/factor.go (72%) rename {transform => rolling/transformation}/factor_test.go (96%) diff --git a/Makefile b/Makefile index 6df28ff..6739073 100644 --- a/Makefile +++ b/Makefile @@ -11,7 +11,7 @@ gen: @go generate $(PKG) lint: - golangci-lint run -E gofmt --fix -v $(PKG) + golangci-lint run -E gofmt,gci --fix -v $(PKG) count: @bash -c $(PWD)/scripts/count-code-lines.sh @@ -22,10 +22,6 @@ test: bench: @RUN=$(RUN) PKG=$(PKG) TIMEOUT=$(TIMEOUT) bash -c $(PWD)/scripts/benchmark.sh -doc: - godoc -http=:6060 -goroot=$(HOME)/go & - echo http://localhost:6060/pkg/bow - CPUPROFILE=/tmp/$(shell basename $(PWD))$(shell echo $(PKG) | sed 's/[^[:alnum:]\t]//g').cpu.prof MEMPROFILE=/tmp/$(shell basename $(PWD))$(shell echo $(PKG) | sed 's/[^[:alnum:]\t]//g').mem.prof diff --git a/XXXexamples_test.go b/XXXexamples_test.go index 218e9d5..d3e6f57 100644 --- a/XXXexamples_test.go +++ b/XXXexamples_test.go @@ -27,23 +27,46 @@ func ExampleNewBow() { } func ExampleNewBowFromColBasedInterfaces() { - colNames := []string{"time", "value", "valueFromJson"} + colNames := []string{"time", "value", "valueFromJSON"} colTypes := make([]Type, len(colNames)) colTypes[0] = Int64 - colData := [][]interface{}{ + colBasedData := [][]interface{}{ {1, 1.2, json.Number("3")}, {1, json.Number("1.2"), 3}, {json.Number("1.1"), 2, 1.3}, } - b, err := NewBowFromColBasedInterfaces(colNames, colTypes, colData) + b, err := NewBowFromColBasedInterfaces(colNames, colTypes, colBasedData) if err != nil { panic(err) } fmt.Println(b) // Output: - // time:int64 value:int64 valueFromJson:float64 + // time:int64 value:int64 valueFromJSON:float64 + // 1 1 1.1 + // 1 2 + // 3 3 1.3 + // metadata: [] +} + +func ExampleNewBowFromRowBasedInterfaces() { + colNames := []string{"time", "value", "valueFromJSON"} + colTypes := []Type{Int64, Int64, Float64} + rowBasedData := [][]interface{}{ + {1, 1, json.Number("1.1")}, + {1.2, json.Number("1.2"), 2}, + {json.Number("3"), 3, 1.3}, + } + + b, err := NewBowFromRowBasedInterfaces(colNames, colTypes, rowBasedData) + if err != nil { + panic(err) + } + + fmt.Println(b) + // Output: + // time:int64 value:int64 valueFromJSON:float64 // 1 1 1.1 // 1 2 // 3 3 1.3 @@ -51,16 +74,16 @@ func ExampleNewBowFromColBasedInterfaces() { } func ExampleBow_MarshalJSON() { - columns := []string{"time", "value", "valueFromJson"} - ts := make([]Type, len(columns)) - ts[0] = Int64 - cols := [][]interface{}{ + colNames := []string{"time", "value", "valueFromJSON"} + colTypes := make([]Type, len(colNames)) + colTypes[0] = Int64 + colBasedData := [][]interface{}{ {1, 1.2, json.Number("3")}, {1, json.Number("1.2"), 3}, {json.Number("1.1"), 2, 1.3}, } - b, err := NewBowFromColBasedInterfaces(columns, ts, cols) + b, err := NewBowFromColBasedInterfaces(colNames, colTypes, colBasedData) if err != nil { panic(err) } @@ -72,7 +95,7 @@ func ExampleBow_MarshalJSON() { // pretty print json var out bytes.Buffer - if err := json.Indent(&out, js, "", "\t"); err != nil { + if err = json.Indent(&out, js, "", "\t"); err != nil { panic(err) } @@ -90,7 +113,7 @@ func ExampleBow_MarshalJSON() { // "type": "int64" // }, // { - // "name": "valueFromJson", + // "name": "valueFromJSON", // "type": "float64" // } // ] @@ -99,16 +122,16 @@ func ExampleBow_MarshalJSON() { // { // "time": 1, // "value": 1, - // "valueFromJson": 1.1 + // "valueFromJSON": 1.1 // }, // { // "time": 1, - // "valueFromJson": 2 + // "valueFromJSON": 2 // }, // { // "time": 3, // "value": 3, - // "valueFromJson": 1.3 + // "valueFromJSON": 1.3 // } // ] // } diff --git a/arrowtests/arrow.go b/arrowtests/arrow.go index 8723d46..3812d38 100644 --- a/arrowtests/arrow.go +++ b/arrowtests/arrow.go @@ -3,9 +3,9 @@ package arrowtests import ( "fmt" - "github.com/apache/arrow/go/arrow" - "github.com/apache/arrow/go/arrow/array" - "github.com/apache/arrow/go/arrow/memory" + "github.com/apache/arrow/go/v7/arrow" + "github.com/apache/arrow/go/v7/arrow/array" + "github.com/apache/arrow/go/v7/arrow/memory" ) var ( @@ -25,7 +25,7 @@ type Event struct { } //NewTSRecord Create a new sample base on eventSchema -func NewTSRecord() (*arrow.Schema, array.Record) { +func NewTSRecord() (*arrow.Schema, arrow.Record) { pool := memory.NewGoAllocator() b := array.NewRecordBuilder(pool, EventSchema) defer b.Release() @@ -38,14 +38,14 @@ func NewTSRecord() (*arrow.Schema, array.Record) { } //PrintRecordColumns Print a columns based output -func PrintRecordColumns(rec array.Record) { +func PrintRecordColumns(rec arrow.Record) { for i, col := range rec.Columns() { fmt.Printf("column[%d] %q: %v\n", i, rec.ColumnName(i), col) } } //PrintRecordRows Print a row based output -func PrintRecordRows(schema *arrow.Schema, recs []array.Record) { +func PrintRecordRows(schema *arrow.Schema, recs []arrow.Record) { // Make a table read only based on many records table := array.NewTableFromRecords(schema, recs) defer table.Release() diff --git a/arrowtests/arrow_test.go b/arrowtests/arrow_test.go index c77a082..092afc7 100644 --- a/arrowtests/arrow_test.go +++ b/arrowtests/arrow_test.go @@ -1,6 +1,6 @@ package arrowtests -import "github.com/apache/arrow/go/arrow/array" +import "github.com/apache/arrow/go/v7/arrow" func ExamplePrintRecordColumns() { _, rec := NewTSRecord() @@ -18,7 +18,7 @@ func ExamplePrintRecordRows() { s, rec := NewTSRecord() defer rec.Release() - PrintRecordRows(s, []array.Record{rec}) + PrintRecordRows(s, []arrow.Record{rec}) // Output: //time: 1 , value: 7 , quality: 42 diff --git a/bow.go b/bow.go index d287a6c..6ee2e95 100644 --- a/bow.go +++ b/bow.go @@ -10,10 +10,9 @@ import ( "github.com/apache/arrow/go/v7/arrow/array" ) -// Bow is a wrapper of Apache Arrow array.Record interface. -// It was not implemented as a facade shadowing Arrow -// in order to expose low level Arrow decisions to Bow users -// while Arrow is in beta. +// Bow is wrapping the Apache Arrow arrow.Record interface, +// which is a collection of equal-length arrow.Array matching a particular arrow.Schema. +// Its purpose is to add convenience methods to easily manipulate dataframes. type Bow interface { String() string Schema() *arrow.Schema @@ -108,70 +107,70 @@ func NewBowEmpty() Bow { func NewBow(series ...Series) (Bow, error) { rec, err := newRecord(Metadata{}, series...) if err != nil { - return nil, fmt.Errorf("bow.NewBow: %w", err) + return nil, fmt.Errorf("newRecord: %w", err) } return &bow{Record: rec}, nil } // NewBowFromColBasedInterfaces returns a new Bow: -// - colNames contains the bow.Record fields names -// - colTypes contains the bow.Record fields data types, optional +// - colNames contains the Series names +// - colTypes contains the Series data types, optional // (if nil, the types will be automatically seeked) -// - colData contains the data to be stored in bow.Record -// (colNames and colData need to be of the same size) -func NewBowFromColBasedInterfaces(colNames []string, colTypes []Type, colData [][]interface{}) (Bow, error) { - if len(colNames) != len(colData) { - return nil, errors.New("bow.NewBowFromColBasedInterfaces: colNames and colData array lengths don't match") +// - colBasedData contains the data itself as a two-dimensional slice, +// with the first dimension being the columns +// (colNames and colBasedData need to be of the same size) +func NewBowFromColBasedInterfaces(colNames []string, colTypes []Type, colBasedData [][]interface{}) (Bow, error) { + if len(colNames) != len(colBasedData) { + return nil, errors.New("colNames and colBasedData slices lengths don't match") } if colTypes == nil { colTypes = make([]Type, len(colNames)) } else if len(colNames) != len(colTypes) { - return nil, errors.New("bow.NewBowFromColBasedInterfaces: colNames and colTypes array lengths don't match") + return nil, errors.New("colNames and colTypes slices lengths don't match") } - var err error - seriesSlice := make([]Series, len(colNames)) + series := make([]Series, len(colNames)) for i, colName := range colNames { - seriesSlice[i] = NewSeriesFromInterfaces(colName, colTypes[i], colData[i]) - if err != nil { - return nil, err - } + series[i] = NewSeriesFromInterfaces(colName, colTypes[i], colBasedData[i]) } - return NewBow(seriesSlice...) + return NewBow(series...) } -// NewBowFromRowBasedInterfaces returns a new Bow from row based data +// NewBowFromRowBasedInterfaces returns a new Bow: +// - colNames contains the Series names +// - colTypes contains the Series data types, required +// - rowBasedData contains the data itself as a two-dimensional slice, +// with the first dimension being the rows +// (colNames and rowBasedData need to be of the same size) func NewBowFromRowBasedInterfaces(colNames []string, colTypes []Type, rowBasedData [][]interface{}) (Bow, error) { if len(colNames) != len(colTypes) { - return nil, errors.New( - "bow.NewBowFromRowBasedInterfaces: mismatch between colNames and colTypes len") + return nil, errors.New("colNames and colTypes slices lengths don't match") } - bufSlice := make([]Buffer, len(colNames)) - for i := range bufSlice { - bufSlice[i] = NewBuffer(len(rowBasedData), colTypes[i]) + buffers := make([]Buffer, len(colNames)) + for i := range buffers { + buffers[i] = NewBuffer(len(rowBasedData), colTypes[i]) } for rowIndex, row := range rowBasedData { if len(row) != len(colNames) { - return nil, errors.New( - "bow.NewBowFromRowBasedInterfaces: mismatch between colNames and row lengths") + return nil, errors.New("colNames and row slices lengths don't match") } for colIndex := range colNames { - bufSlice[colIndex].SetOrDrop(rowIndex, row[colIndex]) + buffers[colIndex].SetOrDrop(rowIndex, row[colIndex]) } } - seriesSlice := make([]Series, len(colNames)) + series := make([]Series, len(colNames)) for i := range colNames { - seriesSlice[i] = NewSeriesFromBuffer(colNames[i], bufSlice[i]) + series[i] = NewSeriesFromBuffer(colNames[i], buffers[i]) } - return NewBow(seriesSlice...) + return NewBow(series...) } // NewEmptySlice returns an empty slice of the Bow @@ -184,7 +183,7 @@ func (b *bow) NewEmptySlice() Bow { func (b *bow) DropNils(colIndices ...int) (Bow, error) { selectedCols, err := selectCols(b, colIndices) if err != nil { - return nil, fmt.Errorf("bow.DropNils: %w", err) + return nil, err } var droppedRowIndices []int @@ -205,43 +204,23 @@ func (b *bow) DropNils(colIndices ...int) (Bow, error) { return b, nil } - bowSlice := make([]Bow, len(droppedRowIndices)+1) + bows := make([]Bow, len(droppedRowIndices)+1) var curr int for i, droppedRowIndex := range droppedRowIndices { - bowSlice[i] = b.NewSlice(curr, droppedRowIndex) + bows[i] = b.NewSlice(curr, droppedRowIndex) curr = droppedRowIndex + 1 } - bowSlice[len(droppedRowIndices)] = b.NewSlice(curr, b.NumRows()) - - return AppendBows(bowSlice...) -} - -// GetRowsChan returns a chan of rows as map[string]interface{} -func (b *bow) GetRowsChan() <-chan map[string]interface{} { - rows := make(chan map[string]interface{}) - go b.getRowsChan(rows) + bows[len(droppedRowIndices)] = b.NewSlice(curr, b.NumRows()) - return rows + return AppendBows(bows...) } -func (b *bow) getRowsChan(rows chan map[string]interface{}) { - defer close(rows) - - if b.Record == nil || b.NumRows() == 0 { - return - } - - for rowIndex := 0; rowIndex < b.NumRows(); rowIndex++ { - rows <- b.GetRow(rowIndex) - } -} - -// Equal returns true if the two Bow are equal: their record, schema and metadata should be equal. +// Equal returns true if the two Bow are equal: their Record, Schema and Metadata should be equal. func (b *bow) Equal(other Bow) bool { b2, ok := other.(*bow) if !ok { - panic("bow.Equal: 'other' isn't a bow object") + panic("'other' isn't a bow object") } if b.Record == nil && b2.Record == nil { @@ -288,7 +267,7 @@ func (b *bow) Equal(other Bow) bool { return true } -// NewSlice returns a new Bow with a zero-copy slice of the Bow arrow.Record. +// NewSlice returns a new Bow with a zero-copy slice of the arrow.Record. // i and j being the minimum and maximum rows respectively. func (b *bow) NewSlice(i, j int) Bow { return &bow{ @@ -307,17 +286,17 @@ func (b *bow) Select(colIndices ...int) (Bow, error) { return nil, err } - var seriesSlice []Series + var series []Series for colIndex := range b.Schema().Fields() { if selectedCols[colIndex] { - seriesSlice = append(seriesSlice, b.NewSeriesFromCol(colIndex)) + series = append(series, b.NewSeriesFromCol(colIndex)) } } - return NewBowWithMetadata(b.Metadata(), seriesSlice...) + return NewBowWithMetadata(b.Metadata(), series...) } -// NumRows returns the number of rows in the Bow +// NumRows returns the number of rows in the Bow. func (b *bow) NumRows() int { if b.Record == nil { return 0 @@ -326,7 +305,7 @@ func (b *bow) NumRows() int { return int(b.Record.NumRows()) } -// NumCols returns the number of columns in the Bow +// NumCols returns the number of columns in the Bow. func (b *bow) NumCols() int { if b.Record == nil { return 0 @@ -335,32 +314,29 @@ func (b *bow) NumCols() int { return int(b.Record.NumCols()) } -// AddCols returns a copy of the Bow with extra columns from the `seriesSlice`. -func (b *bow) AddCols(seriesSlice ...Series) (Bow, error) { - if len(seriesSlice) == 0 { - return b, nil - } - - addedColNames := make(map[string]*interface{}, b.NumCols()+len(seriesSlice)) - newSeriesSlice := make([]Series, b.NumCols()+len(seriesSlice)) +// AddCols returns a copy of the Bow with extra columns from the `series`. +func (b *bow) AddCols(series ...Series) (Bow, error) { + addedColNames := make(map[string]*interface{}, b.NumCols()+len(series)) + newSeries := make([]Series, b.NumCols()+len(series)) for colIndex, col := range b.Schema().Fields() { - newSeriesSlice[colIndex] = b.NewSeriesFromCol(colIndex) + newSeries[colIndex] = b.NewSeriesFromCol(colIndex) addedColNames[col.Name] = nil } - for i, s := range seriesSlice { + for i, s := range series { _, ok := addedColNames[s.Name] if ok { - return nil, fmt.Errorf("bow.AddCols: column %q already exists", s.Name) + return nil, fmt.Errorf("column %q already exists", s.Name) } - newSeriesSlice[b.NumCols()+i] = s + newSeries[b.NumCols()+i] = s addedColNames[s.Name] = nil } - return NewBowWithMetadata(b.Metadata(), newSeriesSlice...) + return NewBowWithMetadata(b.Metadata(), newSeries...) } +// NewSeriesFromCol returns a Series from the column `colIndex`. func (b *bow) NewSeriesFromCol(colIndex int) Series { return Series{ Name: b.ColumnName(colIndex), diff --git a/bowappend.gen.go b/bowappend.gen.go index 01280eb..35e83eb 100644 --- a/bowappend.gen.go +++ b/bowappend.gen.go @@ -4,6 +4,7 @@ package bow import ( "fmt" + "github.com/apache/arrow/go/v7/arrow" "github.com/apache/arrow/go/v7/arrow/array" "github.com/apache/arrow/go/v7/arrow/memory" @@ -27,7 +28,7 @@ func AppendBows(bows ...Bow) (Bow, error) { } refBow := bows[0] - seriesSlice := make([]Series, refBow.NumCols()) + series := make([]Series, refBow.NumCols()) mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) for colIndex := 0; colIndex < refBow.NumCols(); colIndex++ { @@ -98,11 +99,11 @@ func AppendBows(bows ...Bow) (Bow, error) { return nil, fmt.Errorf("unsupported type %v", refType) } - seriesSlice[colIndex] = Series{ + series[colIndex] = Series{ Name: refBow.ColumnName(colIndex), Array: newArray, } } - return NewBowWithMetadata(refBow.Metadata(), seriesSlice...) + return NewBowWithMetadata(refBow.Metadata(), series...) } diff --git a/bowappend.gen.go.tmpl b/bowappend.gen.go.tmpl index 15ef2eb..1b8e30d 100644 --- a/bowappend.gen.go.tmpl +++ b/bowappend.gen.go.tmpl @@ -2,6 +2,7 @@ package bow import ( "fmt" + "github.com/apache/arrow/go/v7/arrow" "github.com/apache/arrow/go/v7/arrow/array" "github.com/apache/arrow/go/v7/arrow/memory" @@ -25,7 +26,7 @@ func AppendBows(bows ...Bow) (Bow, error) { } refBow := bows[0] - seriesSlice := make([]Series, refBow.NumCols()) + series := make([]Series, refBow.NumCols()) mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) for colIndex := 0; colIndex < refBow.NumCols(); colIndex++ { @@ -53,11 +54,11 @@ func AppendBows(bows ...Bow) (Bow, error) { return nil, fmt.Errorf("unsupported type %v", refType) } - seriesSlice[colIndex] = Series{ + series[colIndex] = Series{ Name: refBow.ColumnName(colIndex), Array: newArray, } } - return NewBowWithMetadata(refBow.Metadata(), seriesSlice...) + return NewBowWithMetadata(refBow.Metadata(), series...) } diff --git a/bowbuffer.gen.go b/bowbuffer.gen.go index 3e59ee3..bdca072 100644 --- a/bowbuffer.gen.go +++ b/bowbuffer.gen.go @@ -9,6 +9,7 @@ import ( "github.com/apache/arrow/go/v7/arrow/bitutil" ) +// NewBuffer returns a new Buffer of size `size` and Type `typ`. func NewBuffer(size int, typ Type) Buffer { switch typ { case Int64: @@ -36,6 +37,7 @@ func NewBuffer(size int, typ Type) Buffer { } } +// NewBufferFromData returns from `data`, which has to be a slice of a supported type. func NewBufferFromData(data interface{}) Buffer { var l int switch data.(type) { @@ -52,6 +54,7 @@ func NewBufferFromData(data interface{}) Buffer { } } +// Len returns the length of the Buffer func (b Buffer) Len() int { switch data := b.Data.(type) { case []int64: @@ -67,6 +70,8 @@ func (b Buffer) Len() int { } } +// SetOrDrop sets the value `value` at index `i` by attempting a type conversion to the Buffer Type. +// Set the bit in the Buffer nullBitmapBytes if the conversion succeeded, or clear it otherwise. func (b *Buffer) SetOrDrop(i int, value interface{}) { var valid bool switch v := b.Data.(type) { @@ -89,6 +94,8 @@ func (b *Buffer) SetOrDrop(i int, value interface{}) { } } +// SetOrDrop sets the value `value` at index `i` by attempting a type assertion to the Buffer Type. +// Set the bit in the Buffer nullBitmapBytes if the type assertion succeeded, or clear it otherwise. func (b *Buffer) SetOrDropStrict(i int, value interface{}) { var valid bool switch v := b.Data.(type) { @@ -111,6 +118,7 @@ func (b *Buffer) SetOrDropStrict(i int, value interface{}) { } } +// GetValue gets the value at index `i` from the Buffer func (b *Buffer) GetValue(i int) interface{} { if bitutil.BitIsNotSet(b.nullBitmapBytes, i) { return nil @@ -129,6 +137,7 @@ func (b *Buffer) GetValue(i int) interface{} { } } +// Less returns whether the value at index `i` is less that the value at index `j`. func (b Buffer) Less(i, j int) bool { switch v := b.Data.(type) { case []int64: @@ -144,6 +153,7 @@ func (b Buffer) Less(i, j int) bool { } } +// NewBufferFromCol returns a new Buffer created from the column at index `colIndex`. func (b *bow) NewBufferFromCol(colIndex int) Buffer { data := b.Column(colIndex).Data() switch b.ColumnType(colIndex) { diff --git a/bowbuffer.gen.go.tmpl b/bowbuffer.gen.go.tmpl index 358bd20..b874b0f 100644 --- a/bowbuffer.gen.go.tmpl +++ b/bowbuffer.gen.go.tmpl @@ -7,6 +7,7 @@ import ( "github.com/apache/arrow/go/v7/arrow/bitutil" ) +// NewBuffer returns a new Buffer of size `size` and Type `typ`. func NewBuffer(size int, typ Type) Buffer { switch typ { {{range .Data.types -}} @@ -21,6 +22,7 @@ func NewBuffer(size int, typ Type) Buffer { } } +// NewBufferFromData returns from `data`, which has to be a slice of a supported type. func NewBufferFromData(data interface{}) Buffer { var l int switch data.(type) { @@ -36,6 +38,7 @@ func NewBufferFromData(data interface{}) Buffer { } } +// Len returns the length of the Buffer func (b Buffer) Len() int { switch data := b.Data.(type) { {{range .Data.types -}} @@ -47,6 +50,8 @@ func (b Buffer) Len() int { } } +// SetOrDrop sets the value `value` at index `i` by attempting a type conversion to the Buffer Type. +// Set the bit in the Buffer nullBitmapBytes if the conversion succeeded, or clear it otherwise. func (b *Buffer) SetOrDrop(i int, value interface{}) { var valid bool switch v := b.Data.(type) { @@ -65,6 +70,8 @@ func (b *Buffer) SetOrDrop(i int, value interface{}) { } } +// SetOrDrop sets the value `value` at index `i` by attempting a type assertion to the Buffer Type. +// Set the bit in the Buffer nullBitmapBytes if the type assertion succeeded, or clear it otherwise. func (b *Buffer) SetOrDropStrict(i int, value interface{}) { var valid bool switch v := b.Data.(type) { @@ -83,6 +90,7 @@ func (b *Buffer) SetOrDropStrict(i int, value interface{}) { } } +// GetValue gets the value at index `i` from the Buffer func (b *Buffer) GetValue(i int) interface{} { if bitutil.BitIsNotSet(b.nullBitmapBytes, i) { return nil @@ -97,6 +105,7 @@ func (b *Buffer) GetValue(i int) interface{} { } } +// Less returns whether the value at index `i` is less that the value at index `j`. func (b Buffer) Less(i, j int) bool { switch v := b.Data.(type) { {{range .Data.types -}} @@ -112,6 +121,7 @@ func (b Buffer) Less(i, j int) bool { } } +// NewBufferFromCol returns a new Buffer created from the column at index `colIndex`. func (b *bow) NewBufferFromCol(colIndex int) Buffer { data := b.Column(colIndex).Data() switch b.ColumnType(colIndex) { diff --git a/bowbuffer.go b/bowbuffer.go index 26dd72a..95f735c 100644 --- a/bowbuffer.go +++ b/bowbuffer.go @@ -7,6 +7,9 @@ import ( "github.com/apache/arrow/go/v7/arrow/bitutil" ) +// Buffer is a mutable data structure with the purpose of easily building data Series with: +// - Data: slice of data +// - nullBitmapBytes: slice of bytes representing type Buffer struct { Data interface{} nullBitmapBytes []byte @@ -44,24 +47,29 @@ func buildNullBitmapBytes(dataLength int, validityArray interface{}) []byte { return res } -func NewBufferFromInterfaces(typ Type, cells []interface{}) (Buffer, error) { - buf := NewBuffer(len(cells), typ) - for i, c := range cells { +// NewBufferFromInterfaces returns a new Buffer of type `typ` with the data represented as a slice of interface{}, with eventual nil values. +func NewBufferFromInterfaces(typ Type, data []interface{}) (Buffer, error) { + buf := NewBuffer(len(data), typ) + for i, c := range data { buf.SetOrDrop(i, c) } return buf, nil } +// IsValid return true if the value at row `rowIndex` is valid. func (b Buffer) IsValid(rowIndex int) bool { return bitutil.BitIsSet(b.nullBitmapBytes, rowIndex) } +// IsNull return true if the value at row `rowIndex` is nil. func (b Buffer) IsNull(rowIndex int) bool { return bitutil.BitIsNotSet(b.nullBitmapBytes, rowIndex) } +// IsSorted returns true if the values of the Buffer are sorted in ascending order. func (b Buffer) IsSorted() bool { return sort.IsSorted(b) } +// Swap swaps the values of the Buffer at indices i and j. func (b Buffer) Swap(i, j int) { v1, v2 := b.GetValue(i), b.GetValue(j) b.SetOrDropStrict(i, v2) diff --git a/bowconvert.go b/bowconvert.go index a9b75f7..c2797fd 100644 --- a/bowconvert.go +++ b/bowconvert.go @@ -6,125 +6,130 @@ import ( "strconv" ) -func ToInt64(i interface{}) (int64, bool) { - switch v := i.(type) { +// ToInt64 attempts to convert `input` to int64. +// Return also a false boolean if the conversion failed. +func ToInt64(input interface{}) (output int64, ok bool) { + switch value := input.(type) { case json.Number: - val, err := v.Int64() - return val, err == nil + output, err := value.Int64() + return output, err == nil case int: - return int64(v), true + return int64(value), true case int8: - return int64(v), true + return int64(value), true case int16: - return int64(v), true + return int64(value), true case int32: - return int64(v), true + return int64(value), true case int64: - return v, true + return value, true case float32: - return int64(v), true + return int64(value), true case float64: - return int64(v), true + return int64(value), true case bool: - if v { + if value { return 1, true } return 0, true case string: - val, err := strconv.ParseInt(v, 10, 64) - return val, err == nil - default: - return 0, false + output, err := strconv.ParseInt(value, 10, 64) + return output, err == nil } + return } -func ToFloat64(i interface{}) (float64, bool) { - switch v := i.(type) { +// ToFloat64 attempts to convert `input` to float64. +// Return also a false boolean if the conversion failed. +func ToFloat64(input interface{}) (output float64, ok bool) { + switch input := input.(type) { case float64: - return v, true + return input, true case json.Number: - val, err := v.Float64() - return val, err == nil + output, err := input.Float64() + return output, err == nil case int: - return float64(v), true + return float64(input), true case int8: - return float64(v), true + return float64(input), true case int16: - return float64(v), true + return float64(input), true case int32: - return float64(v), true + return float64(input), true case int64: - return float64(v), true + return float64(input), true case float32: - return float64(v), true + return float64(input), true case bool: - if v { + if input { return 1., true } return 0., true case string: - val, err := strconv.ParseFloat(v, 64) - return val, err == nil - default: - return 0, false + output, err := strconv.ParseFloat(input, 64) + return output, err == nil } + return } -func ToBoolean(i interface{}) (bool, bool) { - switch v := i.(type) { +// ToBoolean attempts to convert `input` to bool. +// Return also a false boolean if the conversion failed. +// In case of numeric type, returns true if the value is non-zero. +func ToBoolean(input interface{}) (output bool, ok bool) { + switch input := input.(type) { case bool: - return v, true + return input, true case string: - val, err := strconv.ParseBool(v) - return val, err == nil + output, err := strconv.ParseBool(input) + return output, err == nil case json.Number: - val, err := v.Float64() - return val != 0., err != nil + output, err := input.Float64() + return output != 0., err != nil case int: - return v != 0, true + return input != 0, true case int8: - return v != 0, true + return input != 0, true case int16: - return v != 0, true + return input != 0, true case int32: - return v != 0, true + return input != 0, true case int64: - return v != 0, true + return input != 0, true case float32: - return v != 0, true + return input != 0., true case float64: - return v != 0, true - default: - return false, false + return input != 0., true } + return } -func ToString(i interface{}) (string, bool) { - switch v := i.(type) { +// ToString attempts to convert `input` to string. +// Return also a false boolean if the conversion failed. +func ToString(input interface{}) (output string, ok bool) { + switch input := input.(type) { case bool: - if v { + if input { return "true", true } return "false", true case string: - return v, true + return input, true case json.Number: - return v.String(), true + return input.String(), true case int: - return strconv.Itoa(v), true + return strconv.Itoa(input), true case int8: - return strconv.Itoa(int(v)), true + return strconv.Itoa(int(input)), true case int16: - return strconv.Itoa(int(v)), true + return strconv.Itoa(int(input)), true case int32: - return strconv.Itoa(int(v)), true + return strconv.Itoa(int(input)), true case int64: - return strconv.Itoa(int(v)), true + return strconv.Itoa(int(input)), true case float32: - return fmt.Sprintf("%f", v), true + return fmt.Sprintf("%f", input), true case float64: - return fmt.Sprintf("%f", v), true - default: - return "", false + return fmt.Sprintf("%f", input), true } + return } diff --git a/bowdiff.go b/bowdiff.go index 05f2213..6beed6f 100644 --- a/bowdiff.go +++ b/bowdiff.go @@ -12,7 +12,7 @@ import ( func (b *bow) Diff(colIndices ...int) (Bow, error) { selectedCols, err := selectCols(b, colIndices) if err != nil { - return nil, fmt.Errorf("bow.Diff: %w", err) + return nil, err } for colIndex, col := range b.Schema().Fields() { @@ -22,7 +22,7 @@ func (b *bow) Diff(colIndices ...int) (Bow, error) { case Boolean: default: return nil, fmt.Errorf( - "bow.Diff: column '%s' is of unsupported type '%v'", + "column '%s' is of unsupported type '%v'", col.Name, b.ColumnType(colIndex)) } } diff --git a/bowfill.go b/bowfill.go index 1bc51e7..264530b 100644 --- a/bowfill.go +++ b/bowfill.go @@ -13,22 +13,22 @@ import ( // Fills only int64 and float64 types. func (b *bow) FillLinear(refColIndex, toFillColIndex int) (Bow, error) { if refColIndex < 0 || refColIndex > b.NumCols()-1 { - return nil, fmt.Errorf("bow.FillLinear: refColIndex is out of range") + return nil, fmt.Errorf("refColIndex is out of range") } if toFillColIndex < 0 || toFillColIndex > b.NumCols()-1 { - return nil, fmt.Errorf("bow.FillLinear: toFillColIndex is out of range") + return nil, fmt.Errorf("toFillColIndex is out of range") } if refColIndex == toFillColIndex { - return nil, fmt.Errorf("bow.FillLinear: refColIndex and toFillColIndex are equal") + return nil, fmt.Errorf("refColIndex and toFillColIndex are equal") } switch b.ColumnType(refColIndex) { case Int64: case Float64: default: - return nil, fmt.Errorf("bow.FillLinear: refColIndex '%d' is of type '%s'", + return nil, fmt.Errorf("refColIndex '%d' is of type '%s'", refColIndex, b.ColumnType(refColIndex)) } @@ -37,7 +37,7 @@ func (b *bow) FillLinear(refColIndex, toFillColIndex int) (Bow, error) { } if !b.IsColSorted(refColIndex) { - return nil, fmt.Errorf("bow.FillLinear: refColIndex '%d' is empty or not sorted", + return nil, fmt.Errorf("refColIndex '%d' is empty or not sorted", refColIndex) } @@ -46,7 +46,7 @@ func (b *bow) FillLinear(refColIndex, toFillColIndex int) (Bow, error) { case Float64: default: return nil, fmt.Errorf( - "bow.FillLinear: toFillColIndex '%d' is of unsupported type '%s'", + "toFillColIndex '%d' is of unsupported type '%s'", toFillColIndex, b.ColumnType(toFillColIndex)) } @@ -108,7 +108,7 @@ func (b *bow) FillLinear(refColIndex, toFillColIndex int) (Bow, error) { func (b *bow) FillMean(colIndices ...int) (Bow, error) { toFillCols, err := selectCols(b, colIndices) if err != nil { - return nil, fmt.Errorf("bow.FillMean: %w", err) + return nil, err } for colIndex, col := range b.Schema().Fields() { @@ -118,7 +118,7 @@ func (b *bow) FillMean(colIndices ...int) (Bow, error) { case Float64: default: return nil, fmt.Errorf( - "bow.FillMean: column '%s' is of unsupported type '%s'", + "column '%s' is of unsupported type '%s'", col.Name, b.ColumnType(colIndex)) } } @@ -177,7 +177,7 @@ func (b *bow) FillPrevious(colIndices ...int) (Bow, error) { func fill(method string, b *bow, colIndices ...int) (Bow, error) { toFillCols, err := selectCols(b, colIndices) if err != nil { - return nil, fmt.Errorf("bow.Fill%s: %w", method, err) + return nil, err } var wg sync.WaitGroup @@ -279,7 +279,7 @@ func selectCols(b *bow, colIndices []int) ([]bool, error) { for _, colIndex := range colIndices { if colIndex < 0 || colIndex > b.NumCols()-1 { - return nil, fmt.Errorf("selectCols: out of range colIndex '%d'", colIndex) + return nil, fmt.Errorf("selectCols: colIndex '%d' out of range", colIndex) } selectedCols[colIndex] = true } diff --git a/bowfind.go b/bowfind.go index 5bb5bd9..99d4e4a 100644 --- a/bowfind.go +++ b/bowfind.go @@ -1,14 +1,17 @@ package bow -func (b *bow) Find(columnIndex int, value interface{}) int { - return b.FindNext(columnIndex, 0, value) +// Find returns the index of the row where `value` is found in the `colIndex` column. +// Returns -1 if the value is not found. +func (b *bow) Find(colIndex int, value interface{}) int { + return b.FindNext(colIndex, 0, value) } -func (b *bow) FindNext(columnIndex, rowIndex int, value interface{}) int { +// FindNext returns the index of the row where `value` is found in the `colIndex` column, starting from the `rowIndex` row. +// Returns -1 if the value is not found. +func (b *bow) FindNext(colIndex, rowIndex int, value interface{}) int { if value == nil { - col := b.Column(columnIndex) for i := 0; i < b.NumRows(); i++ { - if !col.IsValid(i) { + if !b.Column(colIndex).IsValid(i) { return i } } @@ -16,13 +19,14 @@ func (b *bow) FindNext(columnIndex, rowIndex int, value interface{}) int { } for i := rowIndex; i < b.NumRows(); i++ { - if value == b.GetValue(columnIndex, i) { + if value == b.GetValue(colIndex, i) { return i } } return -1 } -func (b *bow) Contains(columnIndex int, value interface{}) bool { - return b.Find(columnIndex, value) != -1 +// Contains returns whether `value` is found in `colIndex` columns. +func (b *bow) Contains(colIndex int, value interface{}) bool { + return b.Find(colIndex, value) != -1 } diff --git a/bowgenerator.go b/bowgenerator.go index b96cf9a..7839829 100644 --- a/bowgenerator.go +++ b/bowgenerator.go @@ -8,10 +8,14 @@ import ( "github.com/google/uuid" ) -const ( - genDefaultNumRows = 3 -) - +const genDefaultNumRows = 3 + +// GenSeriesOptions are options to generate random Series: +// - NumRows: number of rows of the resulting Series +// - Name: name of the Series +// - Type: data type of the Series +// - GenStrategy: strategy of data generation +// - MissingData: sets whether the Series includes random nil values type GenSeriesOptions struct { NumRows int Name string @@ -20,9 +24,9 @@ type GenSeriesOptions struct { MissingData bool } -// NewGenBow generates a new random bow +// NewGenBow generates a new random Bow with `numRows` rows and eventual GenSeriesOptions. func NewGenBow(numRows int, options ...GenSeriesOptions) (Bow, error) { - seriesSlice := make([]Series, len(options)) + series := make([]Series, len(options)) nameMap := make(map[string]struct{}) for i, o := range options { o.NumRows = numRows @@ -31,12 +35,13 @@ func NewGenBow(numRows int, options ...GenSeriesOptions) (Bow, error) { o.Name = fmt.Sprintf("%s_%d", o.Name, i) } nameMap[o.Name] = struct{}{} - seriesSlice[i] = o.genSeries() + series[i] = o.genSeries() } - return NewBow(seriesSlice...) + return NewBow(series...) } +// NewGenSeries returns a new randomly generated Series. func NewGenSeries(o GenSeriesOptions) Series { o.validate() return o.genSeries() @@ -70,20 +75,25 @@ func (o *GenSeriesOptions) genSeries() Series { return NewSeriesFromBuffer(o.Name, buf) } +// GenStrategy defines how random values are generated. type GenStrategy func(typ Type, seed int) interface{} +// GenStrategyRandom generates a random number of type `typ`. func GenStrategyRandom(typ Type, seed int) interface{} { return newRandomNumber(typ) } +// GenStrategyIncremental generates a number of type `typ` equal to the converted `seed` value. func GenStrategyIncremental(typ Type, seed int) interface{} { return typ.Convert(seed) } +// GenStrategyDecremental generates a number of type `typ` equal to the opposite of the converted `seed` value. func GenStrategyDecremental(typ Type, seed int) interface{} { return typ.Convert(-seed) } +// GenStrategyRandomIncremental generates a random number of type `typ` by using the `seed` value. func GenStrategyRandomIncremental(typ Type, seed int) interface{} { i := int64(seed) * 10 switch typ { @@ -96,6 +106,7 @@ func GenStrategyRandomIncremental(typ Type, seed int) interface{} { } } +// GenStrategyRandomDecremental generates a random number of type `typ` by using the `seed` value. func GenStrategyRandomDecremental(typ Type, seed int) interface{} { i := -int64(seed) * 10 switch typ { diff --git a/bowgenerator_test.go b/bowgenerator_test.go index 9d4c5dc..d3f82ac 100644 --- a/bowgenerator_test.go +++ b/bowgenerator_test.go @@ -22,7 +22,7 @@ func TestGenerator(t *testing.T) { }) t.Run("with missing data", func(t *testing.T) { - b, err := NewGenBow(1000000, GenSeriesOptions{MissingData: true}) + b, err := NewGenBow(100, GenSeriesOptions{MissingData: true}) assert.NoError(t, err) b2, err := b.DropNils() diff --git a/bowgetters.go b/bowgetters.go index 01fe9f3..58eb89d 100644 --- a/bowgetters.go +++ b/bowgetters.go @@ -8,6 +8,7 @@ import ( "github.com/apache/arrow/go/v7/arrow/array" ) +// GetRow returns the row `rowIndex`. Map keys represent column names. func (b *bow) GetRow(rowIndex int) map[string]interface{} { row := map[string]interface{}{} for colIndex := 0; colIndex < b.NumCols(); colIndex++ { @@ -21,6 +22,27 @@ func (b *bow) GetRow(rowIndex int) map[string]interface{} { return row } +// GetRowsChan returns a chan of all the rows. Map keys represent column names. +func (b *bow) GetRowsChan() <-chan map[string]interface{} { + rows := make(chan map[string]interface{}) + go b.getRowsChan(rows) + + return rows +} + +func (b *bow) getRowsChan(rows chan map[string]interface{}) { + defer close(rows) + + if b.Record == nil || b.NumRows() == 0 { + return + } + + for rowIndex := 0; rowIndex < b.NumRows(); rowIndex++ { + rows <- b.GetRow(rowIndex) + } +} + +// GetValue returns the value at row `rowIndex` and column `colIndex'. func (b *bow) GetValue(colIndex, rowIndex int) interface{} { if b.Column(colIndex).IsNull(rowIndex) { return nil @@ -40,56 +62,36 @@ func (b *bow) GetValue(colIndex, rowIndex int) interface{} { } } -func (b *bow) GetNextValue(colIndex, rowIndex int) (interface{}, int) { +// GetPrevValue returns the value and the index of the previous non-nil value in the column `colIndex`, starting from the row `rowIndex`. +// Returns nil and -1 if no value has been found. +func (b *bow) GetPrevValue(colIndex, rowIndex int) (interface{}, int) { for rowIndex >= 0 && rowIndex < b.NumRows() { value := b.GetValue(colIndex, rowIndex) if value != nil { return value, rowIndex } - rowIndex++ + rowIndex-- } return nil, -1 } -func (b *bow) GetNextValues(colIndex1, colIndex2, rowIndex int) (interface{}, interface{}, int) { - for rowIndex >= 0 && rowIndex < b.NumRows() { - var v1 interface{} - v1, rowIndex = b.GetNextValue(colIndex1, rowIndex) - v2, rowIndex2 := b.GetNextValue(colIndex2, rowIndex) - if rowIndex == rowIndex2 { - return v1, v2, rowIndex - } - rowIndex++ - } - - return nil, nil, -1 -} - -func (b *bow) GetNextRowIndex(colIndex, rowIndex int) int { - col := b.Column(colIndex) - for rowIndex >= 0 && rowIndex < b.NumRows() { - if col.IsValid(rowIndex) { - return rowIndex - } - rowIndex++ - } - - return -1 -} - -func (b *bow) GetPrevValue(colIndex, rowIndex int) (interface{}, int) { +// GetNextValue returns the value and the index of the next non-nil value in the column `colIndex`, starting from the row `rowIndex`. +// Returns nil and -1 if no value has been found. +func (b *bow) GetNextValue(colIndex, rowIndex int) (interface{}, int) { for rowIndex >= 0 && rowIndex < b.NumRows() { value := b.GetValue(colIndex, rowIndex) if value != nil { return value, rowIndex } - rowIndex-- + rowIndex++ } return nil, -1 } +// GetPrevValues returns the two values and the index of the previous non-nil values in columns `colIndex1` and `colIndex2`, starting from row `rowIndex`. +// Returns nil, nil and -1 if no value has been found. func (b *bow) GetPrevValues(colIndex1, colIndex2, rowIndex int) (interface{}, interface{}, int) { for rowIndex >= 0 && rowIndex < b.NumRows() { var v1 interface{} @@ -104,6 +106,24 @@ func (b *bow) GetPrevValues(colIndex1, colIndex2, rowIndex int) (interface{}, in return nil, nil, -1 } +// GetNextValues returns the two values and the index of the next non-nil values in columns `colIndex1` and `colIndex2`, starting from row `rowIndex`. +// Returns nil, nil and -1 if no value has been found. +func (b *bow) GetNextValues(colIndex1, colIndex2, rowIndex int) (interface{}, interface{}, int) { + for rowIndex >= 0 && rowIndex < b.NumRows() { + var v1 interface{} + v1, rowIndex = b.GetNextValue(colIndex1, rowIndex) + v2, rowIndex2 := b.GetNextValue(colIndex2, rowIndex) + if rowIndex == rowIndex2 { + return v1, v2, rowIndex + } + rowIndex++ + } + + return nil, nil, -1 +} + +// GetPrevRowIndex returns the index of the previous non-nil value in the column `colIndex`, starting from row `rowIndex`. +// Returns -1 if no value has been found. func (b *bow) GetPrevRowIndex(colIndex, rowIndex int) int { col := b.Column(colIndex) for rowIndex >= 0 && rowIndex < b.NumRows() { @@ -116,6 +136,22 @@ func (b *bow) GetPrevRowIndex(colIndex, rowIndex int) int { return -1 } +// GetNextRowIndex returns the index of the next non-nil value in the column `colIndex`, starting from row `rowIndex`. +// Returns -1 if no value has been found. +func (b *bow) GetNextRowIndex(colIndex, rowIndex int) int { + col := b.Column(colIndex) + for rowIndex >= 0 && rowIndex < b.NumRows() { + if col.IsValid(rowIndex) { + return rowIndex + } + rowIndex++ + } + + return -1 +} + +// GetInt64 returns the value as int64 from the column `colIndex` and row `rowindex`, and a bool whether the value is nil or not. +// Attempts to convert the value if the type of the column is not arrow.INT64. func (b *bow) GetInt64(colIndex, rowIndex int) (int64, bool) { if rowIndex < 0 || rowIndex >= b.NumRows() { return 0, false @@ -147,30 +183,38 @@ func (b *bow) GetInt64(colIndex, rowIndex int) (int64, bool) { } } -func (b *bow) GetNextInt64(colIndex, rowIndex int) (int64, int) { +// GetPrevInt64 returns the previous non-nil value as int64 and its row index from the column `colIndex`, starting from row `rowindex`. +// Attempts to convert the value if the type of the column is not arrow.INT64. +// Returns 0 and -1 in case no value is found. +func (b *bow) GetPrevInt64(colIndex, rowIndex int) (int64, int) { for rowIndex >= 0 && rowIndex < b.NumRows() { value, ok := b.GetInt64(colIndex, rowIndex) if ok { return value, rowIndex } - rowIndex++ + rowIndex-- } - return 0., -1 + return 0, -1 } -func (b *bow) GetPrevInt64(colIndex, rowIndex int) (int64, int) { +// GetNextInt64 returns the next non-nil value as int64 and its row index from the column `colIndex`, starting from row `rowindex`. +// Attempts to convert the value if the type of the column is not arrow.INT64. +// Returns 0 and -1 in case no value is found. +func (b *bow) GetNextInt64(colIndex, rowIndex int) (int64, int) { for rowIndex >= 0 && rowIndex < b.NumRows() { value, ok := b.GetInt64(colIndex, rowIndex) if ok { return value, rowIndex } - rowIndex-- + rowIndex++ } - return 0., -1 + return 0, -1 } +// GetFloat64 returns the value as float64 from the column `colIndex` and row `rowindex`, and a bool whether the value is nil or not. +// Attempts to convert the value if the type of the column is not arrow.FLOAT64. func (b *bow) GetFloat64(colIndex, rowIndex int) (float64, bool) { if rowIndex < 0 || rowIndex >= b.NumRows() { return 0., false @@ -202,20 +246,24 @@ func (b *bow) GetFloat64(colIndex, rowIndex int) (float64, bool) { } } -func (b *bow) GetNextFloat64s(colIndex1, colIndex2, rowIndex int) (float64, float64, int) { +// GetPrevFloat64 returns the previous non-nil value as float64 and its row index from the column `colIndex`, starting from row `rowindex`. +// Attempts to convert the value if the type of the column is not arrow.FLOAT64. +// Returns 0. and -1 in case no value is found. +func (b *bow) GetPrevFloat64(colIndex, rowIndex int) (float64, int) { for rowIndex >= 0 && rowIndex < b.NumRows() { - var v1 float64 - v1, rowIndex = b.GetNextFloat64(colIndex1, rowIndex) - v2, rowIndex2 := b.GetNextFloat64(colIndex2, rowIndex) - if rowIndex == rowIndex2 { - return v1, v2, rowIndex + value, ok := b.GetFloat64(colIndex, rowIndex) + if ok { + return value, rowIndex } - rowIndex++ + rowIndex-- } - return 0., 0., -1 + return 0., -1 } +// GetNextFloat64 returns the next non-nil value as float64 and its row index from the column `colIndex`, starting from row `rowindex`. +// Attempts to convert the value if the type of the column is not arrow.FLOAT64. +// Returns 0. and -1 in case no value is found. func (b *bow) GetNextFloat64(colIndex, rowIndex int) (float64, int) { for rowIndex >= 0 && rowIndex < b.NumRows() { value, ok := b.GetFloat64(colIndex, rowIndex) @@ -228,6 +276,9 @@ func (b *bow) GetNextFloat64(colIndex, rowIndex int) (float64, int) { return 0., -1 } +// GetPrevFloat64s returns the previous non-nil two values as float64 and their row index from columns `colIndex1` and `colIndex2`, starting from row `rowindex`. +// Attempts to convert the values if the type of the columns are not arrow.FLOAT64. +// Returns 0., 0. and -1 in case no index is found with two non-nil values. func (b *bow) GetPrevFloat64s(colIndex1, colIndex2, rowIndex int) (float64, float64, int) { for rowIndex >= 0 && rowIndex < b.NumRows() { var v1 float64 @@ -242,22 +293,30 @@ func (b *bow) GetPrevFloat64s(colIndex1, colIndex2, rowIndex int) (float64, floa return 0., 0., -1 } -func (b *bow) GetPrevFloat64(colIndex, rowIndex int) (float64, int) { +// GetNextFloat64s returns the next non-nil two values as float64 and their row index from columns `colIndex1` and `colIndex2`, starting from row `rowindex`. +// Attempts to convert the values if the type of the columns are not arrow.FLOAT64. +// Returns 0., 0. and -1 in case no index is found with two non-nil values. +func (b *bow) GetNextFloat64s(colIndex1, colIndex2, rowIndex int) (float64, float64, int) { for rowIndex >= 0 && rowIndex < b.NumRows() { - value, ok := b.GetFloat64(colIndex, rowIndex) - if ok { - return value, rowIndex + var v1 float64 + v1, rowIndex = b.GetNextFloat64(colIndex1, rowIndex) + v2, rowIndex2 := b.GetNextFloat64(colIndex2, rowIndex) + if rowIndex == rowIndex2 { + return v1, v2, rowIndex } - rowIndex-- + rowIndex++ } - return 0., -1 + return 0., 0., -1 } +// ColumnType returns the Bow type from the column `colIndex`. func (b *bow) ColumnType(colIndex int) Type { return getBowTypeFromArrowType(b.Schema().Field(colIndex).Type) } +// ColumnIndex returns the index of the column with the name `colName`, and an error. +// Return an error if the column doesn't exist or if several columns have this name. func (b *bow) ColumnIndex(colName string) (int, error) { colIndices := b.Schema().FieldIndices(colName) if len(colIndices) == 0 { @@ -271,7 +330,7 @@ func (b *bow) ColumnIndex(colName string) (int, error) { return colIndices[0], nil } -// Distinct returns all non nil different values found in a column in a new Bow +// Distinct returns all non-nil different values found in the column `colIndex` in a new Bow. func (b *bow) Distinct(colIndex int) Bow { hitMap := make(map[interface{}]struct{}) for i := 0; i < b.NumRows(); i++ { @@ -280,6 +339,7 @@ func (b *bow) Distinct(colIndex int) Bow { hitMap[val] = struct{}{} } } + buf := NewBuffer(len(hitMap), b.ColumnType(colIndex)) i := 0 for k := range hitMap { @@ -293,5 +353,6 @@ func (b *bow) Distinct(colIndex int) Bow { if err != nil { panic(err) } + return res } diff --git a/bowjoin.go b/bowjoin.go index aaa96ec..0cdd581 100644 --- a/bowjoin.go +++ b/bowjoin.go @@ -11,7 +11,7 @@ func (b *bow) InnerJoin(other Bow) Bow { left := b right, ok := other.(*bow) if !ok { - panic("bow.InnerJoin: non bow object passed as argument") + panic("non bow object passed as argument") } if left.NumCols() == 0 && right.NumCols() == 0 { @@ -53,7 +53,7 @@ func (b *bow) InnerJoin(other Bow) Bow { NewMetadata(keys, values), newSeries...) if err != nil { - panic(fmt.Errorf("bow.InnerJoin: %w", err)) + panic(err) } return newBow @@ -65,7 +65,7 @@ func (b *bow) OuterJoin(other Bow) Bow { left := b right, ok := other.(*bow) if !ok { - panic("bow.OuterJoin: non bow object passed as argument") + panic("non bow object passed as argument") } // Get common columns indices @@ -116,7 +116,7 @@ func (b *bow) OuterJoin(other Bow) Bow { NewMetadata(keys, values), newSeries...) if err != nil { - panic(fmt.Errorf("bow.OuterJoin: %w", err)) + panic(err) } return newBow @@ -133,14 +133,14 @@ func getCommonCols(left, right Bow) map[string][]Buffer { if len(rFields) > 1 { panic(fmt.Errorf( - "bow.Join: too many columns have the same name: right:%+v left:%+v", + "too many columns have the same name: right:%+v left:%+v", right.String(), left.String())) } rField := rFields[0] if rField.Type.ID() != lField.Type.ID() { panic(fmt.Errorf( - "bow.Join: left and right bow on join columns are of incompatible types: %s", + "left and right bow on join columns are of incompatible types: %s", lField.Name)) } diff --git a/bowjoin_test.go b/bowjoin_test.go index d876cb8..b9ff019 100644 --- a/bowjoin_test.go +++ b/bowjoin_test.go @@ -352,9 +352,9 @@ func TestBow_OuterJoin(t *testing.T) { require.NoError(t, err) defer func() { if r := recover(); r == nil || - r.(error).Error() != "bow.Join: left and right bow on join columns are of incompatible types: index1" { + r.(error).Error() != "left and right bow on join columns are of incompatible types: index1" { t.Errorf("indexes of b1 and b2 are incompatible and should panic. Have %v, expect %v", - r, "bow Join: left and right bow on join columns are of incompatible types: index1") + r, "left and right bow on join columns are of incompatible types: index1") } }() b1.OuterJoin(b2) @@ -570,9 +570,9 @@ func TestBow_InnerJoin(t *testing.T) { defer func() { if r := recover(); r == nil || - r.(error).Error() != "bow.Join: left and right bow on join columns are of incompatible types: index1" { + r.(error).Error() != "left and right bow on join columns are of incompatible types: index1" { t.Errorf("indexes of b1 and b2 are incompatible and should panic. Have %v, expect %v", - r, "bow Join: left and right bow on join columns are of incompatible types: index1") + r, "left and right bow on join columns are of incompatible types: index1") } }() b1.InnerJoin(b2) diff --git a/bowjson.go b/bowjson.go index ba128cd..1ff1145 100644 --- a/bowjson.go +++ b/bowjson.go @@ -2,7 +2,6 @@ package bow import ( "encoding/json" - "fmt" ) type jsonField struct { @@ -14,23 +13,27 @@ type JSONSchema struct { Fields []jsonField `json:"fields"` } +// JSONBow is a structure representing a Bow for JSON marshaling purpose. type JSONBow struct { Schema JSONSchema `json:"schema"` RowBasedData []map[string]interface{} `json:"data"` } +// MarshalJSON returns the marshal encoding of the bow. func (b bow) MarshalJSON() ([]byte, error) { return json.Marshal(NewJSONBow(&b)) } -func NewJSONBow(b Bow) (res JSONBow) { +// NewJSONBow returns a new JSONBow structure from a Bow. +func NewJSONBow(b Bow) JSONBow { if b == nil { - return + return JSONBow{} } - res = JSONBow{ + res := JSONBow{ RowBasedData: make([]map[string]interface{}, 0, b.NumRows()), } + for _, col := range b.Schema().Fields() { res.Schema.Fields = append( res.Schema.Fields, @@ -46,24 +49,26 @@ func NewJSONBow(b Bow) (res JSONBow) { } res.RowBasedData = append(res.RowBasedData, row) } - return + + return res } +// UnmarshalJSON parses the JSON-encoded data and stores the result in the bow. func (b *bow) UnmarshalJSON(data []byte) error { jsonB := JSONBow{} if err := json.Unmarshal(data, &jsonB); err != nil { - return fmt.Errorf("bow.UnmarshalJSON: %w", err) + return err } if err := b.NewValuesFromJSON(jsonB); err != nil { - return fmt.Errorf("bow.UnmarshalJSON: %w", err) + return err } return nil } -// NewValuesFromJSON replaces b values by a filled JSONBow struct +// NewValuesFromJSON replaces the bow arrow.Record by a new one represented by the JSONBow structure. func (b *bow) NewValuesFromJSON(jsonB JSONBow) error { if len(jsonB.Schema.Fields) == 0 { b.Record = NewBowEmpty().(*bow).Record @@ -102,18 +107,18 @@ func (b *bow) NewValuesFromJSON(jsonB JSONBow) error { } } - seriesSlice := make([]Series, len(jsonB.Schema.Fields)) + series := make([]Series, len(jsonB.Schema.Fields)) if jsonB.RowBasedData == nil { for i, field := range jsonB.Schema.Fields { typ := getBowTypeFromArrowName(field.Type) buf := NewBuffer(0, typ) - seriesSlice[i] = NewSeriesFromBuffer(field.Name, buf) + series[i] = NewSeriesFromBuffer(field.Name, buf) } - tmpBow, err := NewBow(seriesSlice...) + tmpBow, err := NewBow(series...) if err != nil { - return fmt.Errorf("bow.NewValuesFromJSON: %w", err) + return err } b.Record = tmpBow.(*bow).Record @@ -127,12 +132,12 @@ func (b *bow) NewValuesFromJSON(jsonB JSONBow) error { buf.SetOrDrop(rowIndex, row[field.Name]) } - seriesSlice[fieldIndex] = NewSeriesFromBuffer(field.Name, buf) + series[fieldIndex] = NewSeriesFromBuffer(field.Name, buf) } - tmpBow, err := NewBow(seriesSlice...) + tmpBow, err := NewBow(series...) if err != nil { - return fmt.Errorf("bow.NewValuesFromJSON: %w", err) + return err } b.Record = tmpBow.(*bow).Record diff --git a/bowmetadata.go b/bowmetadata.go index ac0b18e..1cac129 100644 --- a/bowmetadata.go +++ b/bowmetadata.go @@ -7,54 +7,44 @@ import ( "github.com/apache/arrow/go/v7/arrow/array" ) -// Metadata is an arrow metadata wrapping -// often used to reference information about indexes, sort, units... -// Metadata is mutable but copied at creation and at assignation in arrow schema -// Recommended usage is to settle on a key for your business and marshall / unmarshall with json for instance -// to enrich the value always for the same key. -// You can find example of usage by reading parquet or reading arrow file issued by panda in python. -// Consider "bow" key as reserved for the future. +// Metadata is wrapping arrow.Metadata. type Metadata struct { arrow.Metadata } +// NewMetadata returns a new Metadata. func NewMetadata(keys, values []string) Metadata { return Metadata{arrow.NewMetadata(keys, values)} } -func NewMetadataFromMap(m map[string]string) Metadata { - return Metadata{arrow.MetadataFrom(m)} -} - +// NewBowWithMetadata returns a new Bow from Metadata and Series. func NewBowWithMetadata(metadata Metadata, series ...Series) (Bow, error) { rec, err := newRecord(metadata, series...) if err != nil { - return nil, fmt.Errorf("bow.NewBowWithMetadata: %w", err) + return nil, fmt.Errorf("newRecord: %w", err) } return &bow{Record: rec}, nil } -// Metadata return a copy of schema metadata. +// Metadata return a copy of the bow Schema Metadata. func (b *bow) Metadata() Metadata { return NewMetadata( b.Schema().Metadata().Keys(), b.Schema().Metadata().Values()) } -// SetMetadata Set a value for a given key and return a Bow with freshly created metadata +// SetMetadata sets a value for a given key and return a Bow with freshly created Metadata. func (b *bow) SetMetadata(key, value string) Bow { - metadata := b.Metadata() - metadata = metadata.Set(key, value) + m := b.Metadata() + m = m.Set(key, value) return &bow{Record: array.NewRecord( - arrow.NewSchema(b.Schema().Fields(), &metadata.Metadata), + arrow.NewSchema(b.Schema().Fields(), &m.Metadata), b.Columns(), b.Record.NumRows())} } -// WithMetadata completely replace original Metadata -// Use with caution to avoid information loss for metadata issued by other sources -// A copy is assigned, so you can still mutate metadata given as parameter +// WithMetadata replaces the bow original Metadata. func (b *bow) WithMetadata(metadata Metadata) Bow { m := arrow.NewMetadata(metadata.Keys(), metadata.Values()) return &bow{Record: array.NewRecord( @@ -63,41 +53,45 @@ func (b *bow) WithMetadata(metadata Metadata) Bow { b.Record.NumRows())} } -// Set mutate the Metadata in case key already exists and return a fresh copy -// with given key and value assigned -func (md *Metadata) Set(key, value string) Metadata { - srcKeys := md.Keys() - srcValues := md.Values() - srcKeyIdx := md.FindKey(key) - if srcKeyIdx == -1 { - srcKeys = append(srcKeys, key) - srcValues = append(srcValues, value) +// Set returns a new Metadata with the key/value pair set. +// If the key already exists, it replaces its value. +func (m *Metadata) Set(newKey, newValue string) Metadata { + keys := m.Keys() + values := m.Values() + keyIndex := m.FindKey(newKey) + + if keyIndex == -1 { + keys = append(keys, newKey) + values = append(values, newValue) } else { - srcValues[srcKeyIdx] = value + values[keyIndex] = newValue } - return Metadata{arrow.NewMetadata(srcKeys, srcValues)} + + return Metadata{arrow.NewMetadata(keys, values)} } -// SetMany mutate the Metadata in case key already exists and return a fresh copy -// with given keys and values assigned -func (md *Metadata) SetMany(keys, values []string) Metadata { - if len(keys) != len(values) { +// SetMany returns a new Metadata with the key/value pairs set. +// If a key already exists, it replaces its value. +func (m *Metadata) SetMany(newKeys, newValues []string) Metadata { + if len(newKeys) != len(newValues) { panic("metadata len mismatch") } - if len(keys) == 0 { - return *md + if len(newKeys) == 0 { + return *m } - srcKeys := md.Keys() - srcValues := md.Values() - for i, key := range keys { - srcKeyIdx := md.FindKey(key) - if srcKeyIdx == -1 { - srcKeys = append(srcKeys, key) - srcValues = append(srcValues, values[i]) + keys := m.Keys() + values := m.Values() + + for i, newKey := range newKeys { + newKeyIndex := m.FindKey(newKey) + if newKeyIndex == -1 { + keys = append(keys, newKey) + values = append(values, newValues[i]) } else { - srcValues[srcKeyIdx] = values[i] + values[newKeyIndex] = newValues[i] } } - return Metadata{arrow.NewMetadata(srcKeys, srcValues)} + + return Metadata{arrow.NewMetadata(keys, values)} } diff --git a/bowparquet.go b/bowparquet.go index 7e10a9b..266d8b3 100644 --- a/bowparquet.go +++ b/bowparquet.go @@ -38,7 +38,7 @@ type parquetColTypesMeta struct { LogicalType *parquet.LogicalType `json:"logical_type"` } -// NewBowFromParquet loads a parquet object from the file path, returning a new Bow +// NewBowFromParquet loads a parquet object from the file path, returning a new Bow. // Only value columns are used to create the new Bow. // Argument verbose is used to print information about the file loaded. func NewBowFromParquet(path string, verbose bool) (Bow, error) { diff --git a/bowrecord.go b/bowrecord.go index 188a8ec..0ddb985 100644 --- a/bowrecord.go +++ b/bowrecord.go @@ -8,15 +8,6 @@ import ( "github.com/apache/arrow/go/v7/arrow/array" ) -func NewBowFromRecord(record arrow.Record) (Bow, error) { - for _, f := range record.Schema().Fields() { - if getBowTypeFromArrowType(f.Type) == Unknown { - return nil, fmt.Errorf("unsupported type: %s", f.Type.Name()) - } - } - return &bow{Record: record}, nil -} - func newRecord(metadata Metadata, series ...Series) (arrow.Record, error) { var fields []arrow.Field var arrays []arrow.Array diff --git a/bowseries.gen.go b/bowseries.gen.go index f670ec0..396af85 100644 --- a/bowseries.gen.go +++ b/bowseries.gen.go @@ -10,6 +10,12 @@ import ( "github.com/apache/arrow/go/v7/arrow/memory" ) +// NewSeries returns a new Series from: +// - name: string +// - dataArray: slice of the data in any of the Bow supported types +// - validityArray: +// - If nil, the data will be non-nil +// - Can be of type []bool or []byte to represent nil values func NewSeries(name string, dataArray interface{}, validityArray interface{}) Series { switch v := dataArray.(type) { case []int64: @@ -25,6 +31,7 @@ func NewSeries(name string, dataArray interface{}, validityArray interface{}) Se } } +// NewSeriesFromBuffer returns a new Series from a name and a Buffer. func NewSeriesFromBuffer(name string, buf Buffer) Series { switch data := buf.Data.(type) { case []int64: @@ -40,10 +47,58 @@ func NewSeriesFromBuffer(name string, buf Buffer) Series { } } -func NewSeriesFromInterfaces(name string, typ Type, cells []interface{}) Series { +func newInt64Series(name string, data []int64, valid []byte) Series { + length := len(data) + return Series{ + Name: name, + Array: array.NewInt64Data( + array.NewData(mapBowToArrowTypes[Int64], length, + []*memory.Buffer{ + memory.NewBufferBytes(valid), + memory.NewBufferBytes(arrow.Int64Traits.CastToBytes(data)), + }, nil, length-bitutil.CountSetBits(valid, 0, length), 0), + ), + } +} + +func newFloat64Series(name string, data []float64, valid []byte) Series { + length := len(data) + return Series{ + Name: name, + Array: array.NewFloat64Data( + array.NewData(mapBowToArrowTypes[Float64], length, + []*memory.Buffer{ + memory.NewBufferBytes(valid), + memory.NewBufferBytes(arrow.Float64Traits.CastToBytes(data)), + }, nil, length-bitutil.CountSetBits(valid, 0, length), 0), + ), + } +} + +func newBooleanSeries(name string, data []bool, valid []byte) Series { + mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) + builder := array.NewBooleanBuilder(mem) + defer builder.Release() + builder.AppendValues(data, buildNullBitmapBool(len(data), valid)) + return Series{Name: name, Array: builder.NewArray()} +} + +func newStringSeries(name string, data []string, valid []byte) Series { + mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) + builder := array.NewStringBuilder(mem) + defer builder.Release() + builder.AppendValues(data, buildNullBitmapBool(len(data), valid)) + return Series{Name: name, Array: builder.NewArray()} +} + +// NewSeriesFromInterfaces returns a new Series from: +// - name: string +// - typ: Bow Type +// - data: represented by an slice of interface{}, with eventually nil values +func NewSeriesFromInterfaces(name string, typ Type, data []interface{}) Series { if typ == Unknown { var err error - if typ, err = seekType(cells); err != nil { + if typ, err = getBowTypeFromInterfaces(data); err != nil { panic(err) } } @@ -52,9 +107,9 @@ func NewSeriesFromInterfaces(name string, typ Type, cells []interface{}) Series case Int64: builder := array.NewInt64Builder(mem) defer builder.Release() - builder.Resize(len(cells)) - for i := 0; i < len(cells); i++ { - v, ok := ToInt64(cells[i]) + builder.Resize(len(data)) + for i := 0; i < len(data); i++ { + v, ok := ToInt64(data[i]) if !ok { builder.AppendNull() continue @@ -65,9 +120,9 @@ func NewSeriesFromInterfaces(name string, typ Type, cells []interface{}) Series case Float64: builder := array.NewFloat64Builder(mem) defer builder.Release() - builder.Resize(len(cells)) - for i := 0; i < len(cells); i++ { - v, ok := ToFloat64(cells[i]) + builder.Resize(len(data)) + for i := 0; i < len(data); i++ { + v, ok := ToFloat64(data[i]) if !ok { builder.AppendNull() continue @@ -78,9 +133,9 @@ func NewSeriesFromInterfaces(name string, typ Type, cells []interface{}) Series case Boolean: builder := array.NewBooleanBuilder(mem) defer builder.Release() - builder.Resize(len(cells)) - for i := 0; i < len(cells); i++ { - v, ok := ToBoolean(cells[i]) + builder.Resize(len(data)) + for i := 0; i < len(data); i++ { + v, ok := ToBoolean(data[i]) if !ok { builder.AppendNull() continue @@ -91,9 +146,9 @@ func NewSeriesFromInterfaces(name string, typ Type, cells []interface{}) Series case String: builder := array.NewStringBuilder(mem) defer builder.Release() - builder.Resize(len(cells)) - for i := 0; i < len(cells); i++ { - v, ok := ToString(cells[i]) + builder.Resize(len(data)) + for i := 0; i < len(data); i++ { + v, ok := ToString(data[i]) if !ok { builder.AppendNull() continue @@ -105,47 +160,3 @@ func NewSeriesFromInterfaces(name string, typ Type, cells []interface{}) Series panic(fmt.Errorf("unhandled type %s", typ)) } } - -func newInt64Series(name string, data []int64, valid []byte) Series { - length := len(data) - return Series{ - Name: name, - Array: array.NewInt64Data( - array.NewData(mapBowToArrowTypes[Int64], length, - []*memory.Buffer{ - memory.NewBufferBytes(valid), - memory.NewBufferBytes(arrow.Int64Traits.CastToBytes(data)), - }, nil, length-bitutil.CountSetBits(valid, 0, length), 0), - ), - } -} - -func newFloat64Series(name string, data []float64, valid []byte) Series { - length := len(data) - return Series{ - Name: name, - Array: array.NewFloat64Data( - array.NewData(mapBowToArrowTypes[Float64], length, - []*memory.Buffer{ - memory.NewBufferBytes(valid), - memory.NewBufferBytes(arrow.Float64Traits.CastToBytes(data)), - }, nil, length-bitutil.CountSetBits(valid, 0, length), 0), - ), - } -} - -func newBooleanSeries(name string, data []bool, valid []byte) Series { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - builder := array.NewBooleanBuilder(mem) - defer builder.Release() - builder.AppendValues(data, buildNullBitmapBool(len(data), valid)) - return Series{Name: name, Array: builder.NewArray()} -} - -func newStringSeries(name string, data []string, valid []byte) Series { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - builder := array.NewStringBuilder(mem) - defer builder.Release() - builder.AppendValues(data, buildNullBitmapBool(len(data), valid)) - return Series{Name: name, Array: builder.NewArray()} -} diff --git a/bowseries.gen.go.tmpl b/bowseries.gen.go.tmpl index c6496b3..f68fd32 100644 --- a/bowseries.gen.go.tmpl +++ b/bowseries.gen.go.tmpl @@ -8,6 +8,12 @@ import ( "github.com/apache/arrow/go/v7/arrow/memory" ) +// NewSeries returns a new Series from: +// - name: string +// - dataArray: slice of the data in any of the Bow supported types +// - validityArray: +// - If nil, the data will be non-nil +// - Can be of type []bool or []byte to represent nil values func NewSeries(name string, dataArray interface{}, validityArray interface{}) Series { switch v := dataArray.(type) { {{range .Data.types -}} @@ -19,6 +25,7 @@ func NewSeries(name string, dataArray interface{}, validityArray interface{}) Se } } +// NewSeriesFromBuffer returns a new Series from a name and a Buffer. func NewSeriesFromBuffer(name string, buf Buffer) Series { switch data := buf.Data.(type) { {{range .Data.types -}} @@ -30,35 +37,6 @@ func NewSeriesFromBuffer(name string, buf Buffer) Series { } } -func NewSeriesFromInterfaces(name string, typ Type, cells []interface{}) Series { - if typ == Unknown { - var err error - if typ, err = seekType(cells); err != nil { - panic(err) - } - } - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - switch typ { - {{range .Data.types -}} - case {{ .Type }}: - builder := array.New{{ .Type }}Builder(mem) - defer builder.Release() - builder.Resize(len(cells)) - for i := 0; i < len(cells); i++ { - v, ok := To{{ .Type }}(cells[i]) - if !ok { - builder.AppendNull() - continue - } - builder.Append(v) - } - return Series{Name: name, Array: builder.NewArray()} - {{end -}} - default: - panic(fmt.Errorf("unhandled type %s", typ)) - } -} - {{range .Data.types -}} func new{{ .Type }}Series(name string, data []{{ .type }}, valid []byte) Series { {{ if .bufferMemoryMappingEqualArrow -}} @@ -82,4 +60,37 @@ func new{{ .Type }}Series(name string, data []{{ .type }}, valid []byte) Series {{ end -}} } -{{end -}} \ No newline at end of file +{{end -}} + +// NewSeriesFromInterfaces returns a new Series from: +// - name: string +// - typ: Bow Type +// - data: represented by an slice of interface{}, with eventually nil values +func NewSeriesFromInterfaces(name string, typ Type, data []interface{}) Series { + if typ == Unknown { + var err error + if typ, err = getBowTypeFromInterfaces(data); err != nil { + panic(err) + } + } + mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) + switch typ { + {{range .Data.types -}} + case {{ .Type }}: + builder := array.New{{ .Type }}Builder(mem) + defer builder.Release() + builder.Resize(len(data)) + for i := 0; i < len(data); i++ { + v, ok := To{{ .Type }}(data[i]) + if !ok { + builder.AppendNull() + continue + } + builder.Append(v) + } + return Series{Name: name, Array: builder.NewArray()} + {{end -}} + default: + panic(fmt.Errorf("unhandled type %s", typ)) + } +} \ No newline at end of file diff --git a/bowseries.gen_test.go b/bowseries.gen_test.go index f130914..b7af97b 100644 --- a/bowseries.gen_test.go +++ b/bowseries.gen_test.go @@ -7,7 +7,7 @@ import ( "github.com/stretchr/testify/require" ) -func TestNewSeriesFromInterfaces(t *testing.T) { +func TestNewSeriesFromColBasedInterfaces(t *testing.T) { for _, typ := range allType { t.Run(typ.String(), func(t *testing.T) { testcase := []interface{}{typ.Convert(0), nil} diff --git a/bowseries.go b/bowseries.go index c40f89d..fdf573b 100644 --- a/bowseries.go +++ b/bowseries.go @@ -8,7 +8,8 @@ import ( "github.com/apache/arrow/go/v7/arrow" ) -// A Series is simply a named Apache Arrow array.Interface, which is immutable +// Series is wrapping the Apache Arrow arrow.Array interface, with the addition of a name. +// It represents an immutable sequence of values using the Arrow in-memory format. type Series struct { Name string Array arrow.Array @@ -39,8 +40,8 @@ func buildNullBitmapBool(dataLength int, validityArray interface{}) []bool { } } -func seekType(cells []interface{}) (Type, error) { - for _, val := range cells { +func getBowTypeFromInterfaces(colBasedData []interface{}) (Type, error) { + for _, val := range colBasedData { if val != nil { switch val.(type) { case float64, json.Number: diff --git a/bowsetters.go b/bowsetters.go index 21aaa6b..c1cee70 100644 --- a/bowsetters.go +++ b/bowsetters.go @@ -4,48 +4,49 @@ import ( "fmt" ) +// RenameCol returns a new Bow with the column `colIndex` renamed. func (b *bow) RenameCol(colIndex int, newName string) (Bow, error) { if colIndex >= b.NumCols() { - return nil, fmt.Errorf("bow.RenameCol: column index out of bound") + return nil, fmt.Errorf("column index out of bound") } if newName == "" { - return nil, fmt.Errorf("bow.RenameCol: newName cannot be empty") + return nil, fmt.Errorf("newName cannot be empty") } - seriesSlice := make([]Series, b.NumCols()) + series := make([]Series, b.NumCols()) for i, col := range b.Columns() { if i == colIndex { - seriesSlice[i] = Series{ + series[i] = Series{ Name: newName, Array: col, } } else { - seriesSlice[i] = b.NewSeriesFromCol(i) + series[i] = b.NewSeriesFromCol(i) } } - return NewBowWithMetadata(b.Metadata(), seriesSlice...) + return NewBowWithMetadata(b.Metadata(), series...) } -// Apply uses the given function to transform a column into something else, -// its expected return type has to be supported otherwise given results will be stored as null +// Apply uses the given function to transform the values of column `colIndex`. +// Its expected return type has to be supported otherwise given results will be stored as nil values. func (b *bow) Apply(colIndex int, returnType Type, fn func(interface{}) interface{}) (Bow, error) { buf := NewBuffer(b.NumRows(), returnType) for i := 0; i < b.NumRows(); i++ { buf.SetOrDropStrict(i, fn(b.GetValue(colIndex, i))) } - seriesSlice := make([]Series, b.NumCols()) + series := make([]Series, b.NumCols()) for i := range b.Columns() { if i == colIndex { - seriesSlice[i] = NewSeriesFromBuffer(b.ColumnName(colIndex), buf) + series[i] = NewSeriesFromBuffer(b.ColumnName(colIndex), buf) } else { - seriesSlice[i] = b.NewSeriesFromCol(i) + series[i] = b.NewSeriesFromCol(i) } } - return NewBowWithMetadata(b.Metadata(), seriesSlice...) + return NewBowWithMetadata(b.Metadata(), series...) } // Convert transforms a column type into another, @@ -65,7 +66,7 @@ type RowCmp func(b Bow, i int) bool func (b *bow) Filter(fns ...RowCmp) Bow { var indices []int for i := 0; i < b.NumRows(); i++ { - if matchRowComps(b, i, fns...) { + if matchRowCmps(b, i, fns...) { indices = append(indices, i) } } @@ -73,7 +74,8 @@ func (b *bow) Filter(fns ...RowCmp) Bow { if len(indices) == 0 { return b.NewEmptySlice() } - // if all indices are concomitant, slicing is more performent than copying + + // If all indices are concomitant, slicing is more performent than copying lastInclusive := indices[len(indices)-1] + 1 if len(indices) == lastInclusive-indices[0] { return b.NewSlice(indices[0], lastInclusive) @@ -87,30 +89,33 @@ func (b *bow) Filter(fns ...RowCmp) Bow { } filteredSeries[colIndex] = NewSeriesFromBuffer(b.ColumnName(colIndex), buf) } + res, err := NewBowWithMetadata(b.Metadata(), filteredSeries...) if err != nil { panic(err) } + return res } -func matchRowComps(b Bow, i int, fns ...RowCmp) bool { +func matchRowCmps(b Bow, i int, fns ...RowCmp) bool { for _, fn := range fns { if !fn(b, i) { return false } } + return true } -// MakeFilterValues prepares a valid comparator for Filter, it is lazy on given type +// MakeFilterValues prepares a valid comparator for Filter, it is lazy on given type. // Be careful about number to string though, for instance 0.1 give "0.100000", which could be unexpected // If value is of the wrong type and not convertible to column type, comparison will be done on null values! func (b *bow) MakeFilterValues(colIndex int, values ...interface{}) RowCmp { - t := b.ColumnType(colIndex) for i := range values { - values[i] = t.Convert(values[i]) + values[i] = b.ColumnType(colIndex).Convert(values[i]) } + return func(b Bow, i int) bool { return contains(values, b.GetValue(colIndex, i)) } @@ -122,5 +127,6 @@ func contains(values []interface{}, value interface{}) bool { return true } } + return false } diff --git a/bowsort.go b/bowsort.go index 5fed9f4..be9d0bc 100644 --- a/bowsort.go +++ b/bowsort.go @@ -6,12 +6,11 @@ import ( ) // SortByCol returns a new Bow with the rows sorted by a column in ascending order. -// The only type currently supported for the column to sort by is Int64, without nil values. // Returns the same Bow if the column is already sorted. func (b *bow) SortByCol(colIndex int) (Bow, error) { if b.Column(colIndex).NullN() != 0 { return nil, fmt.Errorf( - "bow.SortByCol: column to sort by has %d nil values", + "column to sort by has %d nil values", b.Column(colIndex).NullN()) } @@ -32,8 +31,8 @@ func (b *bow) SortByCol(colIndex int) (Bow, error) { continue } buf := NewBuffer(b.NumRows(), b.ColumnType(i)) - for j, indice := range sortableBuf.indices { - buf.SetOrDropStrict(j, b.GetValue(i, indice)) + for j, index := range sortableBuf.indices { + buf.SetOrDropStrict(j, b.GetValue(i, index)) } sortedSeries[i] = NewSeriesFromBuffer(b.ColumnName(i), buf) } @@ -41,8 +40,7 @@ func (b *bow) SortByCol(colIndex int) (Bow, error) { return NewBowWithMetadata(b.Metadata(), sortedSeries...) } -// Int64Slice implements the methods of sort.Interface, sorting in increasing order -// (not-a-number values are treated as less than other values). +// bufferWithIndices implements the methods of sort.Interface, sorting in ascending order. type bufferWithIndices struct { Buffer indices []int diff --git a/bowstring.go b/bowstring.go index 621a3c5..3ce4bba 100644 --- a/bowstring.go +++ b/bowstring.go @@ -6,10 +6,12 @@ import ( "text/tabwriter" ) +// String returns a formatted representation of the Bow. func (b *bow) String() string { if b.NumCols() == 0 { return "" } + w := new(tabwriter.Writer) writer := new(strings.Builder) // tabs will be replaced by two spaces by formatter @@ -46,7 +48,7 @@ func (b *bow) String() string { } // Flush buffer and format lines along the way - if err := w.Flush(); err != nil { + if err = w.Flush(); err != nil { panic(err) } diff --git a/bowtypes.go b/bowtypes.go index bfb0362..a42410f 100644 --- a/bowtypes.go +++ b/bowtypes.go @@ -13,22 +13,19 @@ type Type int // - complete GetValue bow method const ( - // Unknown is placed first to be by default - // when allocating Type or []Type + // Unknown is placed first to be the default when allocating Type or []Type. Unknown = Type(iota) - // Float64 and following types are native arrow type supported by bow + // Float64 and following types are native arrow type supported by bow. Float64 Int64 Boolean String - // InputDependent is used in transformation like aggregation - // when output type is infer with input type + // InputDependent is used in aggregations when the output type is dependent on the input type. InputDependent - // IteratorDependent is used in transformation like aggregation - // when output type is infer with iteratorType + // IteratorDependent is used in aggregations when the output type is dependent on the iterator type. IteratorDependent ) @@ -62,36 +59,39 @@ var ( }() ) +// ArrowType returns the arrow.DataType from the Bow Type. func (t Type) ArrowType() arrow.DataType { return mapBowToArrowTypes[t] } -func (t Type) Convert(i interface{}) interface{} { - var val interface{} +// Convert attempts to convert the `input` value to the Type t. +// Returns nil if it fails. +func (t Type) Convert(input interface{}) interface{} { + var output interface{} var ok bool switch t { case Float64: - val, ok = ToFloat64(i) + output, ok = ToFloat64(input) case Int64: - val, ok = ToInt64(i) + output, ok = ToInt64(input) case Boolean: - val, ok = ToBoolean(i) + output, ok = ToBoolean(input) case String: - val, ok = ToString(i) + output, ok = ToString(input) } if ok { - return val + return output } return nil } -// IsSupported ensures that the type is currently supported by Bow -// and match a convertible concrete type. +// IsSupported ensures that the Type t is currently supported by Bow and matches a convertible concrete type. func (t Type) IsSupported() bool { _, ok := mapBowToArrowTypes[t] return ok } +// String returns the string representation of the Type t. func (t Type) String() string { at, ok := mapBowToArrowTypes[t] if !ok { @@ -116,6 +116,7 @@ func getBowTypeFromArrowType(arrowType arrow.DataType) Type { return typ } +// GetAllTypes returns all Bow types. func GetAllTypes() []Type { res := make([]Type, len(allType)) copy(res, allType) diff --git a/rolling/aggregation.go b/rolling/aggregation.go index aa5c717..cb483cc 100644 --- a/rolling/aggregation.go +++ b/rolling/aggregation.go @@ -4,146 +4,154 @@ import ( "fmt" "github.com/metronlab/bow" - "github.com/metronlab/bow/transform" + "github.com/metronlab/bow/rolling/transformation" ) +// ColAggregation is a set of methods to aggregate and transform a Window. type ColAggregation interface { + // InputName returns the name of the input column. InputName() string + // InputIndex returns the index of the input column. InputIndex() int - MutateInputIndex(int) + // SetInputIndex sets the index of the input column. + SetInputIndex(int) + // OutputName returns the name of the output column. OutputName() string - Rename(string) ColAggregation - NeedInclusive() bool + // RenameOutput returns a copy of the ColAggregation with a new output column name. + RenameOutput(string) ColAggregation + // NeedInclusiveWindow returns true if the ColAggregation needs to have inclusive windows. + NeedInclusiveWindow() bool + // Type returns the return type of the ColAggregation. Type() bow.Type + // GetReturnType returns the return type of the ColAggregation depending on an input and an iterator type. + GetReturnType(inputType, iteratorType bow.Type) bow.Type + + // Func returns the ColAggregationFunc of the ColAggregation. Func() ColAggregationFunc - Transform(...transform.Transform) ColAggregation - Transforms() []transform.Transform - GetReturnType(inputType bow.Type, iterator bow.Type) bow.Type + + // Transformations returns the transformation functions of the ColAggregation. + Transformations() []transformation.Func + // SetTransformations returns a copy of the ColAggregation with new transformations functions. + SetTransformations(...transformation.Func) ColAggregation } type colAggregation struct { - inputName string - inputIndex int - inclusiveWindow bool + inputName string + inputIndex int + needInclusiveWindow bool - fn ColAggregationFunc - transforms []transform.Transform + aggregationFn ColAggregationFunc + transformationFns []transformation.Func outputName string typ bow.Type } -func NewColAggregation(colName string, inclusiveWindow bool, returnedType bow.Type, fn ColAggregationFunc) ColAggregation { +// NewColAggregation returns a new ColAggregation. +func NewColAggregation(inputName string, needInclusiveWindow bool, typ bow.Type, fn ColAggregationFunc) ColAggregation { return &colAggregation{ - inputName: colName, - inputIndex: -1, - inclusiveWindow: inclusiveWindow, - fn: fn, - typ: returnedType, + inputName: inputName, + inputIndex: -1, + needInclusiveWindow: needInclusiveWindow, + aggregationFn: fn, + typ: typ, } } type ColAggregationConstruct func(colName string) ColAggregation type ColAggregationFunc func(colIndex int, w Window) (interface{}, error) -func (a *colAggregation) GetReturnType(input, iterator bow.Type) (typ bow.Type) { - switch a.Type() { - case bow.Int64, bow.Float64, bow.Boolean, bow.String: - typ = a.Type() - case bow.InputDependent: - typ = input - case bow.IteratorDependent: - typ = iterator - default: - panic(fmt.Errorf("invalid return type %v", a.Type())) - } - return +func (a *colAggregation) InputName() string { + return a.inputName } func (a *colAggregation) InputIndex() int { return a.inputIndex } -func (a *colAggregation) InputName() string { - return a.inputName -} - -func (a *colAggregation) MutateInputIndex(i int) { +func (a *colAggregation) SetInputIndex(i int) { a.inputIndex = i } -func (a *colAggregation) Type() bow.Type { - return a.typ +func (a *colAggregation) OutputName() string { + return a.outputName } -func (a *colAggregation) Func() ColAggregationFunc { - return a.fn +func (a *colAggregation) RenameOutput(name string) ColAggregation { + aCopy := *a + aCopy.outputName = name + return &aCopy } -func (a *colAggregation) Transform(transforms ...transform.Transform) ColAggregation { - a2 := *a - a2.transforms = transforms - return &a2 +func (a *colAggregation) NeedInclusiveWindow() bool { + return a.needInclusiveWindow } -func (a *colAggregation) Transforms() []transform.Transform { - return a.transforms +func (a *colAggregation) Type() bow.Type { + return a.typ } -func (a *colAggregation) OutputName() string { - return a.outputName +func (a *colAggregation) Func() ColAggregationFunc { + return a.aggregationFn } -func (a *colAggregation) Rename(name string) ColAggregation { - a2 := *a - a2.outputName = name - return &a2 +func (a *colAggregation) Transformations() []transformation.Func { + return a.transformationFns } -func (a *colAggregation) NeedInclusive() bool { - return a.inclusiveWindow +func (a *colAggregation) SetTransformations(transformations ...transformation.Func) ColAggregation { + aCopy := *a + aCopy.transformationFns = transformations + return &aCopy } -// Aggregate each column using a ColAggregation -func (it *intervalRollingIter) Aggregate(aggrs ...ColAggregation) Rolling { - if it.err != nil { - return it +func (a *colAggregation) GetReturnType(inputType, iteratorType bow.Type) bow.Type { + switch a.Type() { + case bow.Int64, bow.Float64, bow.Boolean, bow.String: + return a.Type() + case bow.InputDependent: + return inputType + case bow.IteratorDependent: + return iteratorType + default: + panic(fmt.Errorf("invalid return type %v", a.Type())) } +} - itCopy := *it - newIntervalCol, aggrs, err := itCopy.indexedAggregations(aggrs) - if err != nil { - return itCopy.setError(fmt.Errorf("rolling.Aggregate error: %w", err)) +func (r *intervalRolling) Aggregate(aggrs ...ColAggregation) Rolling { + if r.err != nil { + return r } - seriesSlice, err := itCopy.aggregateWindows(aggrs) + rCopy := *r + newIntervalCol, aggrs, err := rCopy.indexedAggregations(aggrs) if err != nil { - return itCopy.setError(fmt.Errorf("rolling.Aggregate error: %w", err)) + return rCopy.setError(fmt.Errorf("intervalRolling.indexedAggregations: %w", err)) } - b, err := bow.NewBow(seriesSlice...) + b, err := rCopy.aggregateWindows(aggrs) if err != nil { - return itCopy.setError(fmt.Errorf("rolling.Aggregate error: %w", err)) + return rCopy.setError(fmt.Errorf("intervalRolling.aggregateWindows: %w", err)) } - itNew, err := IntervalRollingForIndex(b, newIntervalCol, itCopy.interval, itCopy.options) + newR, err := newIntervalRolling(b, newIntervalCol, rCopy.interval, rCopy.options) if err != nil { - return itCopy.setError(fmt.Errorf("rolling.Aggregate error: %w", err)) + return rCopy.setError(fmt.Errorf("newIntervalRolling: %w", err)) } - return itNew + return newR } -func (it *intervalRollingIter) indexedAggregations(aggrs []ColAggregation) (int, []ColAggregation, error) { +func (r *intervalRolling) indexedAggregations(aggrs []ColAggregation) (int, []ColAggregation, error) { if len(aggrs) == 0 { return -1, nil, fmt.Errorf("at least one column aggregation is required") } newIntervalCol := -1 for i := range aggrs { - isInterval, err := it.validateAggregation(aggrs[i], i) + isInterval, err := r.validateAggregation(aggrs[i], i) if err != nil { return -1, nil, err } @@ -153,96 +161,78 @@ func (it *intervalRollingIter) indexedAggregations(aggrs []ColAggregation) (int, } if newIntervalCol == -1 { - return -1, nil, fmt.Errorf("must keep interval column '%s'", it.bow.ColumnName(it.colIndex)) + return -1, nil, fmt.Errorf( + "must keep interval column '%s'", r.bow.ColumnName(r.colIndex)) } return newIntervalCol, aggrs, nil } -func (it *intervalRollingIter) validateAggregation(aggr ColAggregation, newIndex int) (isInterval bool, err error) { +func (r *intervalRolling) validateAggregation(aggr ColAggregation, newIndex int) (isInterval bool, err error) { if aggr.InputName() == "" { return false, fmt.Errorf("aggregation %d has no column name", newIndex) } - readIndex, err := it.bow.ColumnIndex(aggr.InputName()) + + readIndex, err := r.bow.ColumnIndex(aggr.InputName()) if err != nil { return false, err } - aggr.MutateInputIndex(readIndex) - if aggr.NeedInclusive() { - it.options.Inclusive = true + aggr.SetInputIndex(readIndex) + + if aggr.NeedInclusiveWindow() { + r.options.Inclusive = true } - return readIndex == it.colIndex, nil + return readIndex == r.colIndex, nil } -// For each colIndex aggregation, gives a series with each point resulting from a window aggregation. -func (it *intervalRollingIter) aggregateWindows(aggrs []ColAggregation) ([]bow.Series, error) { - seriesSlice := make([]bow.Series, len(aggrs)) +func (r *intervalRolling) aggregateWindows(aggrs []ColAggregation) (bow.Bow, error) { + series := make([]bow.Series, len(aggrs)) - for colIndex, aggregation := range aggrs { - itCopy := *it + for colIndex, aggr := range aggrs { + rCopy := *r + typ := aggr.GetReturnType( + rCopy.bow.ColumnType(aggr.InputIndex()), + rCopy.bow.ColumnType(rCopy.colIndex)) + buf := bow.NewBuffer(rCopy.numWindows, typ) - buf, err := itCopy.windowsAggregateBuffer(colIndex, aggregation) - if err != nil { - return nil, err - } - - colName := aggregation.OutputName() - if colName == "" { - colName = itCopy.bow.ColumnName(aggregation.InputIndex()) - } - - seriesSlice[colIndex] = bow.NewSeriesFromBuffer(colName, buf) - } + for rCopy.HasNext() { + winIndex, w, err := rCopy.Next() + if err != nil { + return nil, err + } - return seriesSlice, nil -} + var val interface{} + if !aggr.NeedInclusiveWindow() && w.IsInclusive { + val, err = aggr.Func()(aggr.InputIndex(), (*w).UnsetInclusive()) + } else { + val, err = aggr.Func()(aggr.InputIndex(), *w) + } + if err != nil { + return nil, err + } -func (it *intervalRollingIter) windowsAggregateBuffer(colIndex int, aggr ColAggregation) (bow.Buffer, error) { - var buf bow.Buffer + for _, trans := range aggr.Transformations() { + val, err = trans(val) + if err != nil { + return nil, err + } + } - switch aggr.Type() { - case bow.Int64, bow.Float64, bow.Boolean: - buf = bow.NewBuffer(it.numWindows, aggr.Type()) - case bow.InputDependent: - cType := it.bow.ColumnType(aggr.InputIndex()) - buf = bow.NewBuffer(it.numWindows, cType) - case bow.IteratorDependent: - iType := it.bow.ColumnType(it.colIndex) - buf = bow.NewBuffer(it.numWindows, iType) - default: - return buf, fmt.Errorf( - "aggregation %d has invalid return type %s", colIndex, aggr.Type()) - } + if val == nil { + continue + } - for it.HasNext() { - winIndex, w, err := it.Next() - if err != nil { - return buf, err + buf.SetOrDrop(winIndex, val) } - var val interface{} - if !aggr.NeedInclusive() && w.IsInclusive { - val, err = aggr.Func()(aggr.InputIndex(), (*w).UnsetInclusive()) + if aggr.OutputName() == "" { + series[colIndex] = bow.NewSeriesFromBuffer(rCopy.bow.ColumnName(aggr.InputIndex()), buf) } else { - val, err = aggr.Func()(aggr.InputIndex(), *w) + series[colIndex] = bow.NewSeriesFromBuffer(aggr.OutputName(), buf) } - if err != nil { - return buf, err - } - for _, trans := range aggr.Transforms() { - val, err = trans(val) - if err != nil { - return buf, err - } - } - if val == nil { - continue - } - - buf.SetOrDrop(winIndex, val) } - return buf, nil + return bow.NewBow(series...) } diff --git a/rolling/aggregation/XXXbenchmarks_test.go b/rolling/aggregation/XXXbenchmarks_test.go index cd6ee69..c42c8e5 100644 --- a/rolling/aggregation/XXXbenchmarks_test.go +++ b/rolling/aggregation/XXXbenchmarks_test.go @@ -68,16 +68,16 @@ func benchmarkBow(b *testing.B) { } }) - seriesSlice := make([]bow.Series, 2) + series := make([]bow.Series, 2) rand.Seed(42) - seriesSlice[0] = func(size int64) bow.Series { + series[0] = func(size int64) bow.Series { buf := bow.NewBuffer(int(size), bow.Int64) for i := int64(0); i < size; i++ { buf.SetOrDrop(int(i), i) } return bow.NewSeriesFromBuffer("time", buf) }(BenchSize) - seriesSlice[1] = func(size int64) bow.Series { + series[1] = func(size int64) bow.Series { buf := bow.NewBuffer(int(size), bow.Float64) for i := int64(0); i < size; i++ { buf.SetOrDrop(int(i), rand.Float64()) @@ -87,21 +87,21 @@ func benchmarkBow(b *testing.B) { b.Run("NewBow with validity bitmap", func(b *testing.B) { for n := 0; n < b.N; n++ { - benchBow, err = bow.NewBow(seriesSlice...) + benchBow, err = bow.NewBow(series...) require.NoError(b, err) } }) - seriesSlice = make([]bow.Series, 2) + series = make([]bow.Series, 2) rand.Seed(42) - seriesSlice[0] = func(size int64) bow.Series { + series[0] = func(size int64) bow.Series { buf := bow.NewBuffer(int(size), bow.Int64) for i := int64(0); i < size; i++ { buf.Data.([]int64)[i] = i } return bow.NewSeries("time", buf.Data, nil) }(BenchSize) - seriesSlice[1] = func(size int64) bow.Series { + series[1] = func(size int64) bow.Series { buf := bow.NewBuffer(int(size), bow.Float64) for i := int64(0); i < size; i++ { buf.Data.([]float64)[i] = rand.Float64() @@ -111,7 +111,7 @@ func benchmarkBow(b *testing.B) { b.Run("NewBow without validity bitmap", func(b *testing.B) { for n := 0; n < b.N; n++ { - benchBow, err = bow.NewBow(seriesSlice...) + benchBow, err = bow.NewBow(series...) require.NoError(b, err) } }) diff --git a/rolling/aggregation/core_test.go b/rolling/aggregation/core_test.go index 85f4c7b..804806e 100644 --- a/rolling/aggregation/core_test.go +++ b/rolling/aggregation/core_test.go @@ -6,7 +6,7 @@ import ( "github.com/metronlab/bow" "github.com/metronlab/bow/rolling" - "github.com/metronlab/bow/transform" + "github.com/metronlab/bow/rolling/transformation" "github.com/stretchr/testify/assert" ) @@ -88,7 +88,7 @@ var ( ) func runTestCases(t *testing.T, aggrConstruct rolling.ColAggregationConstruct, - aggrTransforms []transform.Transform, testCases []testCase) { + aggrTransforms []transformation.Func, testCases []testCase) { for _, testCase := range testCases { t.Run(testCase.name, func(t *testing.T) { r, err := rolling.IntervalRolling(testCase.testedBow, timeCol, 10, rolling.Options{}) @@ -96,7 +96,7 @@ func runTestCases(t *testing.T, aggrConstruct rolling.ColAggregationConstruct, aggregated, err := r. Aggregate( WindowStart(timeCol), - aggrConstruct(valueCol).Transform(aggrTransforms...)). + aggrConstruct(valueCol).SetTransformations(aggrTransforms...)). Bow() assert.NoError(t, err) assert.NotNil(t, aggregated) diff --git a/rolling/aggregation/integral.go b/rolling/aggregation/integral.go index 15d63b1..8df6c85 100644 --- a/rolling/aggregation/integral.go +++ b/rolling/aggregation/integral.go @@ -49,7 +49,7 @@ func IntegralStep(col string) rolling.ColAggregation { for rowIndex >= 0 { t1, v1, nextRowIndex := w.Bow.GetNextFloat64s(w.IntervalColIndex, colIndex, rowIndex+1) if nextRowIndex < 0 { - t1 = float64(w.End) + t1 = float64(w.LastValue) } sum += v0 * (t1 - t0) diff --git a/rolling/aggregation/integral_test.go b/rolling/aggregation/integral_test.go index 26dba20..4a04641 100644 --- a/rolling/aggregation/integral_test.go +++ b/rolling/aggregation/integral_test.go @@ -4,7 +4,7 @@ import ( "testing" "github.com/metronlab/bow" - "github.com/metronlab/bow/transform" + "github.com/metronlab/bow/rolling/transformation" "github.com/stretchr/testify/assert" ) @@ -84,7 +84,7 @@ func TestIntegralStep(t *testing.T) { func TestIntegralStep_scaled(t *testing.T) { factor := 0.1 - transforms := []transform.Transform{ + transforms := []transformation.Func{ func(x interface{}) (interface{}, error) { if x == nil { return nil, nil diff --git a/rolling/aggregation/weightedmean.go b/rolling/aggregation/weightedmean.go index d43084c..c9df69e 100644 --- a/rolling/aggregation/weightedmean.go +++ b/rolling/aggregation/weightedmean.go @@ -14,7 +14,7 @@ func WeightedAverageStep(col string) rolling.ColAggregation { return v, err } - windowsWide := float64(w.End - w.Start) + windowsWide := float64(w.LastValue - w.FirstValue) return v.(float64) / windowsWide, nil }) } @@ -28,7 +28,7 @@ func WeightedAverageLinear(col string) rolling.ColAggregation { return v, err } - windowsWide := float64(w.End - w.Start) + windowsWide := float64(w.LastValue - w.FirstValue) return v.(float64) / windowsWide, nil }) } diff --git a/rolling/aggregation/whole.go b/rolling/aggregation/whole.go new file mode 100644 index 0000000..3bec413 --- /dev/null +++ b/rolling/aggregation/whole.go @@ -0,0 +1,93 @@ +package aggregation + +import ( + "errors" + "fmt" + + "github.com/metronlab/bow" + "github.com/metronlab/bow/rolling" +) + +// Aggregate the whole dataframe on column intervalColName with one or several rolling.ColAggregation. +func Aggregate(b bow.Bow, intervalColName string, aggrs ...rolling.ColAggregation) (bow.Bow, error) { + if b == nil { + return nil, errors.New("nil bow") + } + if len(aggrs) == 0 { + return nil, errors.New("at least one column aggregation is required") + } + + intervalColIndex, err := b.ColumnIndex(intervalColName) + if err != nil { + return nil, err + } + + series := make([]bow.Series, len(aggrs)) + + for aggrIndex, aggr := range aggrs { + if aggr.InputName() == "" { + return nil, fmt.Errorf("column aggregation %d: no input name", aggrIndex) + } + + inputColIndex, err := b.ColumnIndex(aggr.InputName()) + if err != nil { + return nil, fmt.Errorf("column aggregation %d: %w", aggrIndex, err) + } + + aggr.SetInputIndex(inputColIndex) + + name := aggr.OutputName() + if name == "" { + name = b.ColumnName(aggr.InputIndex()) + } + + typ := aggr.GetReturnType( + b.ColumnType(aggr.InputIndex()), + b.ColumnType(aggr.InputIndex())) + + var buf bow.Buffer + if b.NumRows() == 0 { + buf = bow.NewBuffer(0, typ) + } else { + buf = bow.NewBuffer(1, typ) + + firstValue, firstValueIndex := b.GetNextFloat64(intervalColIndex, 0) + if firstValueIndex == -1 { + firstValue = -1 + } + + lastValue, lastValueIndex := b.GetPrevFloat64(intervalColIndex, b.NumRows()-1) + if lastValueIndex == -1 { + lastValue = -1 + } + + w := rolling.Window{ + Bow: b, + IntervalColIndex: intervalColIndex, + IsInclusive: true, + FirstIndex: 0, + FirstValue: int64(firstValue), + LastValue: int64(lastValue), + } + + aggrValue, err := aggr.Func()(aggr.InputIndex(), w) + if err != nil { + return nil, fmt.Errorf("column aggregation %d: %w", aggrIndex, err) + } + + for transIndex, trans := range aggr.Transformations() { + aggrValue, err = trans(aggrValue) + if err != nil { + return nil, fmt.Errorf("column aggregation %d: transIndex %d: %w", + aggrIndex, transIndex, err) + } + } + + buf.SetOrDropStrict(0, aggrValue) + } + + series[aggrIndex] = bow.NewSeriesFromBuffer(name, buf) + } + + return bow.NewBow(series...) +} diff --git a/rolling/aggregation/wholeaggregation_test.go b/rolling/aggregation/whole_test.go similarity index 95% rename from rolling/aggregation/wholeaggregation_test.go rename to rolling/aggregation/whole_test.go index 28b6fa1..5d2a11a 100644 --- a/rolling/aggregation/wholeaggregation_test.go +++ b/rolling/aggregation/whole_test.go @@ -86,8 +86,8 @@ func TestAggregate(t *testing.T) { {1., 2., 3.}, }) actual, err := Aggregate(b, timeCol, - WindowStart(timeCol).Rename("a"), - ArithmeticMean(valueCol).Rename("b"), + WindowStart(timeCol).RenameOutput("a"), + ArithmeticMean(valueCol).RenameOutput("b"), ) require.Nil(t, err) expected, _ := bow.NewBowFromColBasedInterfaces( @@ -134,9 +134,9 @@ func TestAggregate(t *testing.T) { {1., 2., 3.}, }) actual, err := Aggregate(b, timeCol, - ArithmeticMean(valueCol).Rename("a"), - ArithmeticMean(valueCol).Rename("b"), - ArithmeticMean(valueCol).Rename("c"), + ArithmeticMean(valueCol).RenameOutput("a"), + ArithmeticMean(valueCol).RenameOutput("b"), + ArithmeticMean(valueCol).RenameOutput("c"), ) require.Nil(t, err) expected, _ := bow.NewBowFromColBasedInterfaces( @@ -164,7 +164,7 @@ func TestAggregate(t *testing.T) { WindowStart("-"), ) require.Nil(t, actual) - require.EqualError(t, err, "aggregate on 'time': no column '-'") + require.EqualError(t, err, "column aggregation 0: no column '-'") }) t.Run("float", func(t *testing.T) { diff --git a/rolling/aggregation/wholeaggregation.go b/rolling/aggregation/wholeaggregation.go deleted file mode 100644 index f580a36..0000000 --- a/rolling/aggregation/wholeaggregation.go +++ /dev/null @@ -1,114 +0,0 @@ -package aggregation - -import ( - "fmt" - - "github.com/metronlab/bow" - "github.com/metronlab/bow/rolling" -) - -// Aggregate any column with a ColAggregation -func Aggregate(b bow.Bow, refColName string, aggrs ...rolling.ColAggregation) (bow.Bow, error) { - if b == nil { - return nil, fmt.Errorf("aggregate on '%s': nil bow", refColName) - } - if len(aggrs) == 0 { - return nil, fmt.Errorf("aggregate on '%s': at least one column aggregation is required", refColName) - } - - for i := range aggrs { - err := validateAggr(b, aggrs[i]) - if err != nil { - return nil, fmt.Errorf("aggregate on '%s': %w", refColName, err) - } - } - - refColIndex, err := b.ColumnIndex(refColName) - if err != nil { - return nil, err - } - - aggregatedBow, err := aggregateCols(b, refColIndex, aggrs) - if err != nil { - return nil, fmt.Errorf("aggregate on '%s': %w", refColName, err) - } - - return aggregatedBow, nil -} - -func validateAggr(b bow.Bow, aggr rolling.ColAggregation) error { - if aggr.InputName() == "" { - return fmt.Errorf("no column name") - } - - colIndex, err := b.ColumnIndex(aggr.InputName()) - if err != nil { - return err - } - - aggr.MutateInputIndex(colIndex) - - return nil -} - -// TODO: optimize this function with concurrency and less memory usage for accessing intervalCol data -func aggregateCols(b bow.Bow, refColIndex int, aggrs []rolling.ColAggregation) (bow.Bow, error) { - seriesSlice := make([]bow.Series, len(aggrs)) - - for writeColIndex, aggr := range aggrs { - name := aggr.OutputName() - if name == "" { - name = b.ColumnName(aggr.InputIndex()) - } - - typ := aggr.GetReturnType( - b.ColumnType(aggr.InputIndex()), - b.ColumnType(aggr.InputIndex())) - - if b.NumRows() == 0 { - buf := bow.NewBuffer(0, typ) - seriesSlice[writeColIndex] = bow.NewSeriesFromBuffer(name, buf) - continue - } - - buf := bow.NewBuffer(1, typ) - - firstIndex := -1 - if b.NumRows() > 0 { - firstIndex = 0 - } - start, startIndex := b.GetNextFloat64(refColIndex, 0) - if startIndex == -1 { - start = -1 - } - end, endIndex := b.GetPrevFloat64(refColIndex, b.NumRows()-1) - if endIndex == -1 { - end = -1 - } - w := rolling.Window{ - Bow: b, - IntervalColIndex: refColIndex, - IsInclusive: true, - FirstIndex: firstIndex, - Start: int64(start), - End: int64(end), - } - - val, err := aggr.Func()(aggr.InputIndex(), w) - if err != nil { - return nil, err - } - - for _, transform := range aggr.Transforms() { - val, err = transform(val) - if err != nil { - return nil, err - } - } - - buf.SetOrDrop(0, val) - seriesSlice[writeColIndex] = bow.NewSeriesFromBuffer(name, buf) - } - - return bow.NewBow(seriesSlice...) -} diff --git a/rolling/aggregation/windowstart.go b/rolling/aggregation/windowstart.go index e66f00d..421746b 100644 --- a/rolling/aggregation/windowstart.go +++ b/rolling/aggregation/windowstart.go @@ -8,6 +8,6 @@ import ( func WindowStart(col string) rolling.ColAggregation { return rolling.NewColAggregation(col, false, bow.IteratorDependent, func(col int, w rolling.Window) (interface{}, error) { - return w.Start, nil + return w.FirstValue, nil }) } diff --git a/rolling/aggregation_test.go b/rolling/aggregation_test.go index afbad74..54dd684 100644 --- a/rolling/aggregation_test.go +++ b/rolling/aggregation_test.go @@ -6,18 +6,24 @@ import ( "github.com/metronlab/bow" "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" ) func TestIntervalRolling_Aggregate(t *testing.T) { - b, _ := bow.NewBowFromColBasedInterfaces([]string{timeCol, valueCol}, []bow.Type{bow.Int64, bow.Float64}, [][]interface{}{ - {10, 15, 16, 25, 29}, - {1.0, 1.5, 1.6, 2.5, 2.9}, - }) - r, _ := IntervalRolling(b, timeCol, 10, Options{}) + b, err := bow.NewBowFromColBasedInterfaces( + []string{timeCol, valueCol}, + []bow.Type{bow.Int64, bow.Float64}, + [][]interface{}{ + {10, 15, 16, 25, 29}, + {1.0, 1.5, 1.6, 2.5, 2.9}, + }) + require.NoError(t, err) + r, err := IntervalRolling(b, timeCol, 10, Options{}) + require.NoError(t, err) timeAggr := NewColAggregation(timeCol, false, bow.Int64, func(col int, w Window) (interface{}, error) { - return w.Start, nil + return w.FirstValue, nil }) valueAggr := NewColAggregation(valueCol, false, bow.Float64, func(col int, w Window) (interface{}, error) { @@ -32,7 +38,7 @@ func TestIntervalRolling_Aggregate(t *testing.T) { aggregated, err := r. Aggregate(timeAggr, valueAggr). Bow() - assert.Nil(t, err) + assert.NoError(t, err) assert.NotNil(t, aggregated) expected, _ := bow.NewBowFromColBasedInterfaces( []string{timeCol, valueCol}, @@ -48,7 +54,7 @@ func TestIntervalRolling_Aggregate(t *testing.T) { aggregated, err := r. Aggregate(valueAggr, timeAggr). Bow() - assert.Nil(t, err) + assert.NoError(t, err) assert.NotNil(t, aggregated) expected, _ := bow.NewBowFromColBasedInterfaces( []string{valueCol, timeCol}, @@ -61,8 +67,8 @@ func TestIntervalRolling_Aggregate(t *testing.T) { }) t.Run("rename columns", func(t *testing.T) { - aggregated, err := r.Aggregate(timeAggr.Rename("a"), valueAggr.Rename("b")).Bow() - assert.Nil(t, err) + aggregated, err := r.Aggregate(timeAggr.RenameOutput("a"), valueAggr.RenameOutput("b")).Bow() + assert.NoError(t, err) assert.NotNil(t, aggregated) expected, _ := bow.NewBowFromColBasedInterfaces( []string{"a", "b"}, @@ -76,7 +82,7 @@ func TestIntervalRolling_Aggregate(t *testing.T) { t.Run("less than in original", func(t *testing.T) { aggregated, err := r.Aggregate(timeAggr).Bow() - assert.Nil(t, err) + assert.NoError(t, err) assert.NotNil(t, aggregated) expected, _ := bow.NewBowFromColBasedInterfaces( []string{timeCol}, @@ -88,8 +94,8 @@ func TestIntervalRolling_Aggregate(t *testing.T) { }) t.Run("more than in original", func(t *testing.T) { - aggregated, err := r.Aggregate(timeAggr, doubleAggr.Rename("double"), valueAggr).Bow() - assert.Nil(t, err) + aggregated, err := r.Aggregate(timeAggr, doubleAggr.RenameOutput("double"), valueAggr).Bow() + assert.NoError(t, err) assert.NotNil(t, aggregated) expected, _ := bow.NewBowFromColBasedInterfaces( []string{timeCol, "double", valueCol}, @@ -104,23 +110,29 @@ func TestIntervalRolling_Aggregate(t *testing.T) { t.Run("missing interval colIndex", func(t *testing.T) { _, err := r.Aggregate(valueAggr).Bow() - assert.EqualError(t, err, fmt.Sprintf("rolling.Aggregate error: must keep interval column '%s'", timeCol)) + assert.EqualError(t, err, fmt.Sprintf( + "intervalRolling.indexedAggregations: must keep interval column '%s'", timeCol)) }) t.Run("invalid colIndex", func(t *testing.T) { _, err := r.Aggregate(timeAggr, NewColAggregation("-", false, bow.Int64, func(col int, w Window) (interface{}, error) { return nil, nil })).Bow() - assert.EqualError(t, err, "rolling.Aggregate error: no column '-'") + assert.EqualError(t, err, + "intervalRolling.indexedAggregations: no column '-'") }) } func TestWindow_UnsetInclusive(t *testing.T) { - inclusiveBow, err := bow.NewBowFromColBasedInterfaces([]string{"time", "value"}, []bow.Type{bow.Int64, bow.Int64}, + inclusiveBow, err := bow.NewBowFromColBasedInterfaces( + []string{timeCol, valueCol}, + []bow.Type{bow.Int64, bow.Int64}, [][]interface{}{ {1, 2}, {1, 2}}) assert.NoError(t, err) - exclusiveBow, err := bow.NewBowFromColBasedInterfaces([]string{"time", "value"}, []bow.Type{bow.Int64, bow.Int64}, + exclusiveBow, err := bow.NewBowFromColBasedInterfaces( + []string{timeCol, valueCol}, + []bow.Type{bow.Int64, bow.Int64}, [][]interface{}{ {1}, {1}}) @@ -130,8 +142,8 @@ func TestWindow_UnsetInclusive(t *testing.T) { Bow: inclusiveBow, FirstIndex: 0, IntervalColIndex: 0, - Start: 0, - End: 2, + FirstValue: 0, + LastValue: 2, IsInclusive: true, } @@ -142,8 +154,8 @@ func TestWindow_UnsetInclusive(t *testing.T) { Bow: nil, FirstIndex: 0, IntervalColIndex: 0, - Start: 0, - End: 2, + FirstValue: 0, + LastValue: 2, IsInclusive: false, }, exclusiveWindow) @@ -152,8 +164,8 @@ func TestWindow_UnsetInclusive(t *testing.T) { Bow: inclusiveBow, FirstIndex: 0, IntervalColIndex: 0, - Start: 0, - End: 2, + FirstValue: 0, + LastValue: 2, IsInclusive: true, }, inclusiveWindow) } diff --git a/rolling/interpolation.go b/rolling/interpolation.go index e910eba..6ac0534 100644 --- a/rolling/interpolation.go +++ b/rolling/interpolation.go @@ -6,17 +6,7 @@ import ( "github.com/metronlab/bow" ) -// ColInterpolationFunc provides a value at the start of `window`. -type ColInterpolationFunc func(inputCol int, window Window, fullBow, prevRow bow.Bow) (interface{}, error) - -func NewColInterpolation(colName string, inputTypes []bow.Type, fn ColInterpolationFunc) ColInterpolation { - return ColInterpolation{ - colName: colName, - inputTypes: inputTypes, - fn: fn, - } -} - +// ColInterpolation is used to interpolate a column. type ColInterpolation struct { colName string inputTypes []bow.Type @@ -25,44 +15,33 @@ type ColInterpolation struct { colIndex int } -// Interpolate fills each window by interpolating its start if missing -func (it *intervalRollingIter) Interpolate(interps ...ColInterpolation) Rolling { - if it.err != nil { - return it - } - - itCopy := *it - newIntervalCol, interps, err := itCopy.indexedInterpolations(interps) - if err != nil { - return itCopy.setError(fmt.Errorf("interpolate: %w", err)) - } +// ColInterpolationFunc is a function that take a column index, a Window, the full bow.Bow and the previous row, and provides a value at the start of the Window. +type ColInterpolationFunc func(colIndex int, window Window, fullBow, prevRow bow.Bow) (interface{}, error) - b, err := itCopy.interpolateWindows(interps) - if err != nil { - return itCopy.setError(fmt.Errorf("interpolate: %w", err)) - } - if b == nil { - b = it.bow.NewEmptySlice() +// NewColInterpolation returns a new ColInterpolation. +func NewColInterpolation(colName string, inputTypes []bow.Type, fn ColInterpolationFunc) ColInterpolation { + return ColInterpolation{ + colName: colName, + inputTypes: inputTypes, + fn: fn, } +} - newIt, err := IntervalRollingForIndex(b, newIntervalCol, itCopy.interval, itCopy.options) - if err != nil { - return itCopy.setError(fmt.Errorf("interpolate: %w", err)) +func (r *intervalRolling) Interpolate(interps ...ColInterpolation) Rolling { + if r.err != nil { + return r } - return newIt -} - -func (it *intervalRollingIter) indexedInterpolations(interps []ColInterpolation) (int, []ColInterpolation, error) { + rCopy := *r if len(interps) == 0 { - return -1, nil, fmt.Errorf("at least one column interpolation is required") + return rCopy.setError(fmt.Errorf("at least one column interpolation is required")) } newIntervalCol := -1 for i := range interps { - isInterval, err := it.validateInterpolation(&interps[i], i) + isInterval, err := r.validateInterpolation(&interps[i], i) if err != nil { - return -1, nil, err + return rCopy.setError(fmt.Errorf("intervalRolling.validateInterpolation: %w", err)) } if isInterval { newIntervalCol = i @@ -70,50 +49,64 @@ func (it *intervalRollingIter) indexedInterpolations(interps []ColInterpolation) } if newIntervalCol == -1 { - return -1, nil, fmt.Errorf("must keep interval column '%s'", it.bow.ColumnName(it.colIndex)) + return rCopy.setError(fmt.Errorf("must keep interval column '%s'", r.bow.ColumnName(r.colIndex))) } - return newIntervalCol, interps, nil + b, err := rCopy.interpolateWindows(interps) + if err != nil { + return rCopy.setError(fmt.Errorf("intervalRolling.interpolateWindows: %w", err)) + } + if b == nil { + b = r.bow.NewEmptySlice() + } + + newR, err := newIntervalRolling(b, newIntervalCol, rCopy.interval, rCopy.options) + if err != nil { + return rCopy.setError(fmt.Errorf("newIntervalRolling: %w", err)) + } + + return newR } -func (it *intervalRollingIter) validateInterpolation(interp *ColInterpolation, newIndex int) (bool, error) { +func (r *intervalRolling) validateInterpolation(interp *ColInterpolation, newIndex int) (bool, error) { if interp.colName == "" { return false, fmt.Errorf("interpolation %d has no column name", newIndex) } - readIndex, err := it.bow.ColumnIndex(interp.colName) + + var err error + interp.colIndex, err = r.bow.ColumnIndex(interp.colName) if err != nil { return false, err } - interp.colIndex = readIndex var typeOk bool - typ := it.bow.ColumnType(interp.colIndex) - for _, inputTyp := range interp.inputTypes { - if typ == inputTyp { + colType := r.bow.ColumnType(interp.colIndex) + for _, inputType := range interp.inputTypes { + if colType == inputType { typeOk = true break } } if !typeOk { return false, fmt.Errorf("accepts types %v, got type %s", - interp.inputTypes, typ.String()) + interp.inputTypes, colType) } - return interp.colIndex == it.colIndex, nil + return interp.colIndex == r.colIndex, nil } -func (it *intervalRollingIter) interpolateWindows(interps []ColInterpolation) (bow.Bow, error) { - it2 := *it +func (r *intervalRolling) interpolateWindows(interps []ColInterpolation) (bow.Bow, error) { + rCopy := *r - bows := make([]bow.Bow, it2.numWindows) + bows := make([]bow.Bow, rCopy.numWindows) - for it2.HasNext() { - winIndex, w, err := it2.Next() + for rCopy.HasNext() { + winIndex, w, err := rCopy.Next() if err != nil { return nil, err } - bows[winIndex], err = it2.interpolateWindow(interps, w) + bows[winIndex], err = rCopy.interpolateWindow(interps, w) if err != nil { return nil, err } @@ -122,33 +115,33 @@ func (it *intervalRollingIter) interpolateWindows(interps []ColInterpolation) (b return bow.AppendBows(bows...) } -func (it *intervalRollingIter) interpolateWindow(interps []ColInterpolation, w *Window) (bow.Bow, error) { +func (r *intervalRolling) interpolateWindow(interps []ColInterpolation, window *Window) (bow.Bow, error) { var firstColValue int64 = -1 - if w.Bow.NumRows() > 0 { - firstColVal, i := w.Bow.GetNextFloat64(it.colIndex, 0) + if window.Bow.NumRows() > 0 { + firstColVal, i := window.Bow.GetNextFloat64(r.colIndex, 0) if i > -1 { firstColValue = int64(firstColVal) } } // has start: call interpolation anyway for those stateful - if firstColValue == w.Start { + if firstColValue == window.FirstValue { for _, interpolation := range interps { - _, err := interpolation.fn(interpolation.colIndex, *w, it.bow, it.options.PrevRow) + _, err := interpolation.fn(interpolation.colIndex, *window, r.bow, r.options.PrevRow) if err != nil { return nil, err } } - return w.Bow, nil + return window.Bow, nil } // missing start - seriesSlice := make([]bow.Series, len(interps)) + series := make([]bow.Series, len(interps)) for colIndex, interpolation := range interps { - colType := w.Bow.ColumnType(interpolation.colIndex) + colType := window.Bow.ColumnType(interpolation.colIndex) - interpolatedValue, err := interpolation.fn(interpolation.colIndex, *w, it.bow, it.options.PrevRow) + interpolatedValue, err := interpolation.fn(interpolation.colIndex, *window, r.bow, r.options.PrevRow) if err != nil { return nil, err } @@ -156,13 +149,13 @@ func (it *intervalRollingIter) interpolateWindow(interps []ColInterpolation, w * buf := bow.NewBuffer(1, colType) buf.SetOrDrop(0, interpolatedValue) - seriesSlice[colIndex] = bow.NewSeriesFromBuffer(w.Bow.ColumnName(interpolation.colIndex), buf) + series[colIndex] = bow.NewSeriesFromBuffer(window.Bow.ColumnName(interpolation.colIndex), buf) } - startBow, err := bow.NewBow(seriesSlice...) + startBow, err := bow.NewBow(series...) if err != nil { return nil, err } - return bow.AppendBows(startBow, w.Bow) + return bow.AppendBows(startBow, window.Bow) } diff --git a/rolling/interpolation/linear.go b/rolling/interpolation/linear.go index 3d00d59..baaf327 100644 --- a/rolling/interpolation/linear.go +++ b/rolling/interpolation/linear.go @@ -31,7 +31,7 @@ func Linear(colName string) rolling.ColInterpolation { return nil, nil } - coef := (float64(w.Start) - t0) / (t2 - t0) + coef := (float64(w.FirstValue) - t0) / (t2 - t0) return ((v2 - v0) * coef) + v0, nil }, ) diff --git a/rolling/interpolation/linear_test.go b/rolling/interpolation/linear_test.go index 59f801d..3d0e11b 100644 --- a/rolling/interpolation/linear_test.go +++ b/rolling/interpolation/linear_test.go @@ -97,7 +97,7 @@ func TestLinear(t *testing.T) { _, err = r. Interpolate(WindowStart(timeCol), Linear(valueCol)). Bow() - assert.EqualError(t, err, "interpolate: accepts types [int64 float64], got type utf8") + assert.EqualError(t, err, "intervalRolling.validateInterpolation: accepts types [int64 float64], got type utf8") }) t.Run("bool error", func(t *testing.T) { @@ -110,7 +110,7 @@ func TestLinear(t *testing.T) { res, err := r. Interpolate(WindowStart(timeCol), Linear(valueCol)). Bow() - assert.EqualError(t, err, "interpolate: accepts types [int64 float64], got type bool", + assert.EqualError(t, err, "intervalRolling.validateInterpolation: accepts types [int64 float64], got type bool", "have res: %v", res) }) } diff --git a/rolling/interpolation/windowstart.go b/rolling/interpolation/windowstart.go index 789d6c4..0b1f52c 100644 --- a/rolling/interpolation/windowstart.go +++ b/rolling/interpolation/windowstart.go @@ -8,7 +8,7 @@ import ( func WindowStart(colName string) rolling.ColInterpolation { return rolling.NewColInterpolation(colName, []bow.Type{bow.Int64}, func(colIndexToFill int, w rolling.Window, fullBow, prevRow bow.Bow) (interface{}, error) { - return w.Start, nil + return w.FirstValue, nil }, ) } diff --git a/rolling/interpolation_test.go b/rolling/interpolation_test.go index 3540414..1d2591f 100644 --- a/rolling/interpolation_test.go +++ b/rolling/interpolation_test.go @@ -11,7 +11,7 @@ import ( func TestIntervalRollingIter_Interpolate(t *testing.T) { timeInterp := NewColInterpolation(timeCol, []bow.Type{bow.Int64}, func(colIndex int, w Window, full, prevRow bow.Bow) (interface{}, error) { - return w.Start, nil + return w.FirstValue, nil }) valueInterp := NewColInterpolation(valueCol, []bow.Type{bow.Int64, bow.Float64}, func(colIndex int, w Window, full, prevRow bow.Bow) (interface{}, error) { @@ -31,7 +31,7 @@ func TestIntervalRollingIter_Interpolate(t *testing.T) { _, err := r. Interpolate(timeInterp, interp). Bow() - assert.EqualError(t, err, "interpolate: accepts types [int64 bool], got type float64") + assert.EqualError(t, err, "intervalRolling.validateInterpolation: accepts types [int64 bool], got type float64") }) t.Run("missing interval column", func(t *testing.T) { @@ -43,7 +43,7 @@ func TestIntervalRollingIter_Interpolate(t *testing.T) { _, err := r. Interpolate(valueInterp). Bow() - assert.EqualError(t, err, fmt.Sprintf("interpolate: must keep interval column '%s'", timeCol)) + assert.EqualError(t, err, fmt.Sprintf("must keep interval column '%s'", timeCol)) }) t.Run("empty bow", func(t *testing.T) { diff --git a/rolling/rolling.go b/rolling/rolling.go index a8c2d0e..fe60613 100644 --- a/rolling/rolling.go +++ b/rolling/rolling.go @@ -7,252 +7,242 @@ import ( "github.com/metronlab/bow" ) -// Rolling allows to process a bow via windows. -// Use `Interpolate` and/or `Aggregate` to transform windows. -// Use `HasNext` and `Next` to iterate over windows. -// Use `Bow` to get the processed bow. +// Rolling enables processing a Bow via windows. +// Use Interpolate() and/or Aggregate() to transform windows. +// Use Next() to iterate over windows. +// Use Bow() to get the processed Bow. type Rolling interface { - Interpolate(interpolations ...ColInterpolation) Rolling - Aggregate(aggregations ...ColAggregation) Rolling + // Aggregate aggregates each column by using a ColAggregation. + Aggregate(...ColAggregation) Rolling + // Interpolate fills each window by interpolating its start if missing. + Interpolate(...ColInterpolation) Rolling + // NumWindows returns the total number of windows in the Bow. NumWindows() (int, error) + // HasNext returns true if the next call to Next() will return a new Window. HasNext() bool - Next() (windowIndex int, w *Window, err error) + // Next returns the next Window, along with its index. + Next() (windowIndex int, window *Window, err error) + // Bow returns the Bow from the Rolling. Bow() (bow.Bow, error) } +type intervalRolling struct { + // TODO: sync.Mutex + bow bow.Bow + colIndex int + interval int64 + options Options + numWindows int + + currWindowStart int64 + currRowIndex int + currWindowIndex int + err error +} + +// Options sets options for IntervalRolling: +// - Offset: interval to move the window start, can be negative. +// - Inclusive: sets if the window needs to be inclusive; i.e., includes the last point. +// - PrevRow: extra point before the window to enable better interpolation. type Options struct { - // offsets windows' start, starting earlier if necessary to preserve first points Offset int64 Inclusive bool PrevRow bow.Bow } -func NumWindowsInRange(first, last, interval, offset int64) (int, error) { - if first > last { - return -1, errors.New("first must be <= last") - } - var err error - offset, err = validateIntervalOffset(interval, offset) - if err != nil { - return -1, err - } - - start := (first/interval)*interval + offset - if start > first { - start -= interval - } - - return int((last-start)/interval + 1), nil -} - -// IntervalRolling provides an interval-based `Rolling`. -// Intervals rely on numerical values regardless of a unit. +// IntervalRolling returns a new interval-based Rolling with: +// - b: Bow to process in windows +// - colName: column on which the interval is based on +// - interval: numeric value independent of any unit, length of the windows // All windows except the last one may be empty. -// `colName`: column name used to make intervals -// `interval`: length of an interval func IntervalRolling(b bow.Bow, colName string, interval int64, options Options) (Rolling, error) { colIndex, err := b.ColumnIndex(colName) if err != nil { - return nil, fmt.Errorf("rolling.IntervalRolling: %w", err) + return nil, err } - return IntervalRollingForIndex(b, colIndex, interval, options) + return newIntervalRolling(b, colIndex, interval, options) } -func IntervalRollingForIndex(b bow.Bow, colIndex int, interval int64, options Options) (Rolling, error) { - var err error - options.Offset, err = validateIntervalOffset(interval, options.Offset) - if err != nil { - return nil, err +func newIntervalRolling(b bow.Bow, colIndex int, interval int64, options Options) (Rolling, error) { + if b.ColumnType(colIndex) != bow.Int64 { + return nil, fmt.Errorf("impossible to create a new intervalRolling on column of type %v", + b.ColumnType(colIndex)) } - options.PrevRow, err = validatePrevRow(options.PrevRow) + var err error + options.Offset, err = enforceIntervalAndOffset(interval, options.Offset) if err != nil { - return nil, err + return nil, fmt.Errorf("enforceIntervalAndOffset: %w", err) } - if b.ColumnType(colIndex) != bow.Int64 { - return nil, fmt.Errorf( - "rolling.IntervalRolling: impossible to roll over type %v", - b.ColumnType(colIndex)) + options.PrevRow, err = enforcePrevRow(options.PrevRow) + if err != nil { + return nil, fmt.Errorf("enforcePrevRow: %w", err) } - var start int64 + var firstWindowStart int64 if b.NumRows() > 0 { - first, valid := b.GetInt64(colIndex, 0) + firstBowValue, valid := b.GetInt64(colIndex, 0) if !valid { - v := b.GetValue(colIndex, 0) return nil, fmt.Errorf( - "rolling.IntervalRolling: expected int64 start value, got %v", v) + "the first value of the column should be convertible to int64, got %v", + b.GetValue(colIndex, 0)) } + // align first window start on interval - start = (first/interval)*interval + options.Offset - if start > first { - start -= interval + firstWindowStart = (firstBowValue/interval)*interval + options.Offset + if firstWindowStart > firstBowValue { + firstWindowStart -= interval } } - numWins, err := numWindows(b, colIndex, start, interval) - if err != nil { - return nil, err - } + numWindows := countWindows(b, colIndex, firstWindowStart, interval) - return &intervalRollingIter{ - bow: b, - colIndex: colIndex, - interval: interval, - options: options, - numWindows: numWins, - currStart: start, + return &intervalRolling{ + bow: b, + colIndex: colIndex, + interval: interval, + options: options, + numWindows: numWindows, + currWindowStart: firstWindowStart, }, nil } -func validatePrevRow(prevRow bow.Bow) (bow.Bow, error) { - if prevRow != nil { - if prevRow.NumRows() == 0 { - prevRow = nil - } else if prevRow.NumRows() != 1 { - return nil, fmt.Errorf( - "validatePrevRow: prevRow must have only one row, have %d", - prevRow.NumRows()) - } - } - - return prevRow, nil -} - -func validateIntervalOffset(interval, offset int64) (int64, error) { +func enforceIntervalAndOffset(interval, offset int64) (int64, error) { if interval <= 0 { - return -1, errors.New("rolling.IntervalRolling: strictly positive interval required") + return -1, errors.New("strictly positive interval required") } + if offset >= interval || offset <= -interval { offset = offset % interval } + if offset < 0 { offset += interval } + return offset, nil } -func numWindows(b bow.Bow, colIndex int, start, interval int64) (int, error) { - if b.NumRows() == 0 { - return 0, nil +func enforcePrevRow(prevRow bow.Bow) (bow.Bow, error) { + if prevRow == nil || prevRow.NumRows() == 0 { + return nil, nil } - last, rowIndex := b.GetPrevInt64(colIndex, b.NumRows()-1) - - if rowIndex == -1 || start > last { - return 0, nil + if prevRow.NumRows() != 1 { + return nil, fmt.Errorf("prevRow must have only one row, have %d", + prevRow.NumRows()) } - return int((last-start)/interval + 1), nil + return prevRow, nil } -type intervalRollingIter struct { - // todo: sync.Mutex +func countWindows(b bow.Bow, colIndex int, firstWindowStart, interval int64) int { + if b.NumRows() == 0 { + return 0 + } - bow bow.Bow - colIndex int - interval int64 - options Options - numWindows int + lastBowValue, lastBowValueRowIndex := b.GetPrevInt64(colIndex, b.NumRows()-1) + if lastBowValueRowIndex == -1 || firstWindowStart > lastBowValue { + return 0 + } - currStart int64 // e.g. start time - currIndex int - windowIndex int - err error + return int((lastBowValue-firstWindowStart)/interval + 1) } -func (it *intervalRollingIter) Bow() (bow.Bow, error) { - return it.bow, it.err +func (r *intervalRolling) NumWindows() (int, error) { + return r.numWindows, r.err } -// HasNext checks if `Next` will provide a window. -// -// todo: concurrent-safe -func (it *intervalRollingIter) HasNext() bool { - if it.currIndex >= it.bow.NumRows() { +// TODO: concurrent-safe + +func (r *intervalRolling) HasNext() bool { + if r.currRowIndex >= r.bow.NumRows() { return false } - n, valid := it.bow.GetInt64(it.colIndex, it.bow.NumRows()-1) - return valid && it.currStart <= n + + lastBowValue, lastBowValueIsValid := r.bow.GetInt64(r.colIndex, r.bow.NumRows()-1) + if !lastBowValueIsValid { + return false + } + + return r.currWindowStart <= lastBowValue } -// Next window if any. -// This mutates the iterator. -// -// todo: concurrent-safe -func (it *intervalRollingIter) Next() (windowIndex int, w *Window, err error) { - if !it.HasNext() { - return it.windowIndex, nil, nil +// TODO: concurrent-safe + +func (r *intervalRolling) Next() (windowIndex int, window *Window, err error) { + if !r.HasNext() { + return r.currWindowIndex, nil, nil } - start := it.currStart - end := it.currStart + it.interval // include last position even if last point is excluded + windowStart := r.currWindowStart + windowEnd := r.currWindowStart + r.interval // include last position even if last point is excluded - firstIndex, lastIndex := it.currIndex, -1 - var i int - var isInclusive bool - for i = firstIndex; i < it.bow.NumRows(); i++ { - ref, ok := it.bow.GetInt64(it.colIndex, i) + rowIndex := 0 + isInclusive := false + firstRowIndex := r.currRowIndex + lastRowIndex := -1 + for rowIndex = firstRowIndex; rowIndex < r.bow.NumRows(); rowIndex++ { + ref, ok := r.bow.GetInt64(r.colIndex, rowIndex) if !ok { continue } - if ref < start { + if ref < windowStart { continue } - if ref > end { + if ref > windowEnd { break } - if ref == end { + if ref == windowEnd { if isInclusive { break } - if !it.options.Inclusive { + if !r.options.Inclusive { break } isInclusive = true } - lastIndex = i + lastRowIndex = rowIndex } if !isInclusive { - it.currIndex = i + r.currRowIndex = rowIndex } else { - it.currIndex = i - 1 + r.currRowIndex = rowIndex - 1 } - it.currStart = end - windowIndex = it.windowIndex - it.windowIndex++ + r.currWindowStart = windowEnd + windowIndex = r.currWindowIndex + r.currWindowIndex++ var b bow.Bow - if lastIndex == -1 { - b = it.bow.NewEmptySlice() + if lastRowIndex == -1 { + b = r.bow.NewEmptySlice() } else { - b = it.bow.NewSlice(firstIndex, lastIndex+1) + b = r.bow.NewSlice(firstRowIndex, lastRowIndex+1) } return windowIndex, &Window{ - FirstIndex: firstIndex, Bow: b, - IntervalColIndex: it.colIndex, - Start: start, - End: end, + FirstIndex: firstRowIndex, + IntervalColIndex: r.colIndex, + FirstValue: windowStart, + LastValue: windowEnd, IsInclusive: isInclusive, }, nil } -func (it *intervalRollingIter) setError(err error) Rolling { - it.err = err - return it +func (r *intervalRolling) Bow() (bow.Bow, error) { + return r.bow, r.err } -// NumWindows gives the total of windows across the entire bow this iterator was built from. -func (it *intervalRollingIter) NumWindows() (int, error) { - return it.numWindows, it.err +func (r *intervalRolling) setError(err error) Rolling { + r.err = err + return r } diff --git a/rolling/rolling_test.go b/rolling/rolling_test.go index 5d29c3a..5a42bc8 100644 --- a/rolling/rolling_test.go +++ b/rolling/rolling_test.go @@ -6,6 +6,7 @@ import ( "github.com/metronlab/bow" "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" ) var ( @@ -16,152 +17,110 @@ var ( emptyCols = [][]interface{}{{}, {}} ) -func TestNumWindowsInRange(t *testing.T) { - t.Run("firstVal == lastVal", func(t *testing.T) { - nbWindows, err := NumWindowsInRange(0, 0, 1, 0) - assert.NoError(t, err) - assert.Equal(t, 1, nbWindows) - }) - - t.Run("firstVal == lastVal with offset", func(t *testing.T) { - nbWindows, err := NumWindowsInRange(0, 0, 1, 1) - assert.NoError(t, err) - assert.Equal(t, 1, nbWindows) - }) - - t.Run("firstVal > lastVal", func(t *testing.T) { - nbWindows, err := NumWindowsInRange(1, 0, 1, 0) - assert.Error(t, err) - assert.Equal(t, -1, nbWindows) - }) - - t.Run("interval < 0", func(t *testing.T) { - nbWindows, err := NumWindowsInRange(0, 0, -1, 0) - assert.Error(t, err) - assert.Equal(t, -1, nbWindows) - }) - - t.Run("without offset one window", func(t *testing.T) { - nbWindows, err := NumWindowsInRange(0, 1, 2, 0) - assert.NoError(t, err) - assert.Equal(t, 1, nbWindows) - }) - - t.Run("without offset two window", func(t *testing.T) { - nbWindows, err := NumWindowsInRange(0, 2, 2, 0) - assert.NoError(t, err) - assert.Equal(t, 2, nbWindows) - }) - - t.Run("with offset one window", func(t *testing.T) { - nbWindows, err := NumWindowsInRange(0, 1, 2, 1) - assert.NoError(t, err) - assert.Equal(t, 2, nbWindows) - }) - - t.Run("with offset two window", func(t *testing.T) { - nbWindows, err := NumWindowsInRange(0, 2, 2, 1) - assert.NoError(t, err) - assert.Equal(t, 2, nbWindows) - }) -} - func TestIntervalRolling_NumWindows(t *testing.T) { t.Run("empty bow", func(t *testing.T) { - r, _ := IntervalRolling(newIntervalRollingTestBow(emptyCols), timeCol, 1, Options{}) + r, err := IntervalRolling(newIntervalRollingTestBow(t, emptyCols), timeCol, 1, Options{}) + require.NoError(t, err) n, err := r.NumWindows() - assert.Nil(t, err) + assert.NoError(t, err) assert.Equal(t, 0, n) }) t.Run("one liner bow", func(t *testing.T) { - r, _ := IntervalRolling(newIntervalRollingTestBow([][]interface{}{ + r, err := IntervalRolling(newIntervalRollingTestBow(t, [][]interface{}{ {0}, {1.}, }), timeCol, 1, Options{}) + require.NoError(t, err) n, err := r.NumWindows() assert.Nil(t, err) assert.Equal(t, 1, n) }) t.Run("points in same window", func(t *testing.T) { - r, _ := IntervalRolling(newIntervalRollingTestBow([][]interface{}{ + r, err := IntervalRolling(newIntervalRollingTestBow(t, [][]interface{}{ {0, 9}, {1., 1.}, }), timeCol, 10, Options{}) + require.NoError(t, err) n, err := r.NumWindows() - assert.Nil(t, err) + assert.NoError(t, err) assert.Equal(t, 1, n) }) t.Run("excluded point goes in next window", func(t *testing.T) { - r, _ := IntervalRolling(newIntervalRollingTestBow([][]interface{}{ + r, err := IntervalRolling(newIntervalRollingTestBow(t, [][]interface{}{ {0, 10}, {1., 1.}, }), timeCol, 10, Options{}) + require.NoError(t, err) n, err := r.NumWindows() - assert.Nil(t, err) + assert.NoError(t, err) assert.Equal(t, 2, n) }) t.Run("offset puts first value in preceding window", func(t *testing.T) { - r, _ := IntervalRolling(newIntervalRollingTestBow([][]interface{}{ + r, err := IntervalRolling(newIntervalRollingTestBow(t, [][]interface{}{ {0, 9}, {1., 1.}, }), timeCol, 10, Options{Offset: 1}) + require.NoError(t, err) n, err := r.NumWindows() - assert.Nil(t, err) + assert.NoError(t, err) assert.Equal(t, 2, n) }) } func TestIntervalRolling_iterator_init(t *testing.T) { t.Run("interval < 0", func(t *testing.T) { - b := newIntervalRollingTestBow([][]interface{}{{0}, {1.}}) + b := newIntervalRollingTestBow(t, [][]interface{}{{0}, {1.}}) rolling, err := IntervalRolling(b, timeCol, 0, Options{}) - assert.EqualError(t, err, "rolling.IntervalRolling: strictly positive interval required") + assert.EqualError(t, err, "enforceIntervalAndOffset: strictly positive interval required") assert.Nil(t, rolling) }) t.Run("interval == 0", func(t *testing.T) { - b := newIntervalRollingTestBow([][]interface{}{{0}, {1.}}) + b := newIntervalRollingTestBow(t, [][]interface{}{{0}, {1.}}) rolling, err := IntervalRolling(b, timeCol, 0, Options{}) - assert.EqualError(t, err, "rolling.IntervalRolling: strictly positive interval required") + assert.EqualError(t, err, "enforceIntervalAndOffset: strictly positive interval required") assert.Nil(t, rolling) }) t.Run("non existing index", func(t *testing.T) { - b := newIntervalRollingTestBow([][]interface{}{{0}, {1.}}) + b := newIntervalRollingTestBow(t, [][]interface{}{{0}, {1.}}) _, err := IntervalRolling(b, badCol, 1, Options{}) - assert.EqualError(t, err, fmt.Sprintf("rolling.IntervalRolling: no column '%s'", badCol)) + assert.EqualError(t, err, fmt.Sprintf("no column '%s'", badCol)) }) t.Run("invalid interval type", func(t *testing.T) { - b, _ := bow.NewBowFromColBasedInterfaces([]string{timeCol}, []bow.Type{bow.Float64}, [][]interface{}{{0.}}) + b, _ := bow.NewBowFromColBasedInterfaces( + []string{timeCol}, + []bow.Type{bow.Float64}, + [][]interface{}{{0.}}) _, err := IntervalRolling(b, timeCol, 1, Options{}) - assert.EqualError(t, err, "rolling.IntervalRolling: impossible to roll over type float64") + assert.EqualError(t, err, "impossible to create a new intervalRolling on column of type float64") }) t.Run("empty bow gives valid finished iterator", func(t *testing.T) { - b := newIntervalRollingTestBow(emptyCols) - rolling, err := IntervalRolling(b, timeCol, 1, Options{}) - assert.Nil(t, err) - iter := rolling.(*intervalRollingIter) - assert.Nil(t, err) - _, w, err := iter.Next() + b := newIntervalRollingTestBow(t, emptyCols) + r, err := IntervalRolling(b, timeCol, 1, Options{}) + assert.NoError(t, err) + rCopy := r.(*intervalRolling) + _, w, err := rCopy.Next() assert.Nil(t, w) - assert.Nil(t, err) + assert.NoError(t, err) }) } func TestIntervalRolling_iterate(t *testing.T) { var interval int64 = 5 - b := newIntervalRollingTestBow([][]interface{}{ - {12, 15, 16, 25, 25, 29}, // 25 is a duplicated index on ref column - {1.2, 1.5, 1.6, 2.5, 3.5, 2.9}, - }) + b := newIntervalRollingTestBow(t, + [][]interface{}{ + {12, 15, 16, 25, 25, 29}, // 25 is a duplicated index on ref column + {1.2, 1.5, 1.6, 2.5, 3.5, 2.9}, + }) t.Run("no option", func(t *testing.T) { - rolling, err := IntervalRolling(b, timeCol, interval, Options{}) - assert.Nil(t, err) - assert.NotNil(t, rolling) - iter := rolling.(*intervalRollingIter) + r, err := IntervalRolling(b, timeCol, interval, Options{}) + assert.NoError(t, err) + assert.NotNil(t, r) + rCopy := r.(*intervalRolling) expected := []testWindow{ {0, 10, 15, 0, [][]interface{}{{12}, {1.2}}}, @@ -170,20 +129,20 @@ func TestIntervalRolling_iterate(t *testing.T) { {3, 25, 30, 3, [][]interface{}{{25, 25, 29}, {2.5, 3.5, 2.9}}}, } - for i := 0; iter.HasNext(); i++ { - checkTestWindow(t, iter, expected[i]) + for i := 0; rCopy.HasNext(); i++ { + checkTestWindow(t, rCopy, expected[i]) } - _, w, err := iter.Next() + _, w, err := rCopy.Next() assert.Nil(t, w) - assert.Nil(t, err) + assert.NoError(t, err) }) t.Run("with inclusive windows", func(t *testing.T) { - rolling, err := IntervalRolling(b, timeCol, interval, Options{Inclusive: true}) - assert.Nil(t, err) - assert.NotNil(t, rolling) - iter := rolling.(*intervalRollingIter) + r, err := IntervalRolling(b, timeCol, interval, Options{Inclusive: true}) + assert.NoError(t, err) + assert.NotNil(t, r) + rCopy := r.(*intervalRolling) expected := []testWindow{ {0, 10, 15, 0, [][]interface{}{{12, 15}, {1.2, 1.5}}}, @@ -192,20 +151,20 @@ func TestIntervalRolling_iterate(t *testing.T) { {3, 25, 30, 3, [][]interface{}{{25, 25, 29}, {2.5, 3.5, 2.9}}}, } - for i := 0; iter.HasNext(); i++ { - checkTestWindow(t, iter, expected[i]) + for i := 0; rCopy.HasNext(); i++ { + checkTestWindow(t, rCopy, expected[i]) } - _, w, err := iter.Next() + _, w, err := rCopy.Next() assert.Nil(t, w) - assert.Nil(t, err) + assert.NoError(t, err) }) t.Run("with offset falling before first point", func(t *testing.T) { - rolling, err := IntervalRolling(b, timeCol, interval, Options{Offset: 1}) + r, err := IntervalRolling(b, timeCol, interval, Options{Offset: 1}) assert.Nil(t, err) - assert.NotNil(t, rolling) - iter := rolling.(*intervalRollingIter) + assert.NotNil(t, r) + rCopy := r.(*intervalRolling) expected := []testWindow{ {0, 11, 16, 0, [][]interface{}{{12, 15}, {1.2, 1.5}}}, @@ -214,20 +173,20 @@ func TestIntervalRolling_iterate(t *testing.T) { {3, 26, 31, 5, [][]interface{}{{29}, {2.9}}}, } - for i := 0; iter.HasNext(); i++ { - checkTestWindow(t, iter, expected[i]) + for i := 0; rCopy.HasNext(); i++ { + checkTestWindow(t, rCopy, expected[i]) } - _, w, err := iter.Next() + _, w, err := rCopy.Next() assert.Nil(t, w) - assert.Nil(t, err) + assert.NoError(t, err) }) t.Run("with offset falling at first point", func(t *testing.T) { - rolling, err := IntervalRolling(b, timeCol, interval, Options{Offset: 2}) - assert.Nil(t, err) - assert.NotNil(t, rolling) - iter := rolling.(*intervalRollingIter) + r, err := IntervalRolling(b, timeCol, interval, Options{Offset: 2}) + assert.NoError(t, err) + assert.NotNil(t, r) + rCopy := r.(*intervalRolling) expected := []testWindow{ {0, 12, 17, 0, [][]interface{}{{12, 15, 16}, {1.2, 1.5, 1.6}}}, @@ -236,20 +195,20 @@ func TestIntervalRolling_iterate(t *testing.T) { {3, 27, 32, 5, [][]interface{}{{29}, {2.9}}}, } - for i := 0; iter.HasNext(); i++ { - checkTestWindow(t, iter, expected[i]) + for i := 0; rCopy.HasNext(); i++ { + checkTestWindow(t, rCopy, expected[i]) } - _, w, err := iter.Next() + _, w, err := rCopy.Next() assert.Nil(t, w) - assert.Nil(t, err) + assert.NoError(t, err) }) t.Run("with offset falling after first point", func(t *testing.T) { - rolling, err := IntervalRolling(b, timeCol, interval, Options{Offset: 3}) - assert.Nil(t, err) - assert.NotNil(t, rolling) - iter := rolling.(*intervalRollingIter) + r, err := IntervalRolling(b, timeCol, interval, Options{Offset: 3}) + assert.NoError(t, err) + assert.NotNil(t, r) + rCopy := r.(*intervalRolling) expected := []testWindow{ {0, 8, 13, 0, [][]interface{}{{12}, {1.2}}}, @@ -259,20 +218,20 @@ func TestIntervalRolling_iterate(t *testing.T) { {4, 28, 33, 5, [][]interface{}{{29}, {2.9}}}, } - for i := 0; iter.HasNext(); i++ { - checkTestWindow(t, iter, expected[i]) + for i := 0; rCopy.HasNext(); i++ { + checkTestWindow(t, rCopy, expected[i]) } - _, w, err := iter.Next() + _, w, err := rCopy.Next() assert.Nil(t, w) - assert.Nil(t, err) + assert.NoError(t, err) }) t.Run("offset > interval", func(t *testing.T) { - rolling, err := IntervalRolling(b, timeCol, interval, Options{Offset: 8}) - assert.Nil(t, err) - assert.NotNil(t, rolling) - iter := rolling.(*intervalRollingIter) + r, err := IntervalRolling(b, timeCol, interval, Options{Offset: 8}) + assert.NoError(t, err) + assert.NotNil(t, r) + rCopy := r.(*intervalRolling) expected := []testWindow{ {0, 8, 13, 0, [][]interface{}{{12}, {1.2}}}, @@ -282,20 +241,20 @@ func TestIntervalRolling_iterate(t *testing.T) { {4, 28, 33, 5, [][]interface{}{{29}, {2.9}}}, } - for i := 0; iter.HasNext(); i++ { - checkTestWindow(t, iter, expected[i]) + for i := 0; rCopy.HasNext(); i++ { + checkTestWindow(t, rCopy, expected[i]) } - _, w, err := iter.Next() + _, w, err := rCopy.Next() assert.Nil(t, w) - assert.Nil(t, err) + assert.NoError(t, err) }) t.Run("offset == interval", func(t *testing.T) { - rolling, err := IntervalRolling(b, timeCol, interval, Options{Offset: 5}) - assert.Nil(t, err) - assert.NotNil(t, rolling) - iter := rolling.(*intervalRollingIter) + r, err := IntervalRolling(b, timeCol, interval, Options{Offset: 5}) + assert.NoError(t, err) + assert.NotNil(t, r) + rCopy := r.(*intervalRolling) expected := []testWindow{ {0, 10, 15, 0, [][]interface{}{{12}, {1.2}}}, @@ -304,20 +263,20 @@ func TestIntervalRolling_iterate(t *testing.T) { {3, 25, 30, 3, [][]interface{}{{25, 25, 29}, {2.5, 3.5, 2.9}}}, } - for i := 0; iter.HasNext(); i++ { - checkTestWindow(t, iter, expected[i]) + for i := 0; rCopy.HasNext(); i++ { + checkTestWindow(t, rCopy, expected[i]) } - _, w, err := iter.Next() + _, w, err := rCopy.Next() assert.Nil(t, w) - assert.Nil(t, err) + assert.NoError(t, err) }) t.Run("offset < 0", func(t *testing.T) { - rolling, err := IntervalRolling(b, timeCol, interval, Options{Offset: -2}) - assert.Nil(t, err) - assert.NotNil(t, rolling) - iter := rolling.(*intervalRollingIter) + r, err := IntervalRolling(b, timeCol, interval, Options{Offset: -2}) + assert.NoError(t, err) + assert.NotNil(t, r) + rCopy := r.(*intervalRolling) expected := []testWindow{ {0, 8, 13, 0, [][]interface{}{{12}, {1.2}}}, @@ -327,13 +286,13 @@ func TestIntervalRolling_iterate(t *testing.T) { {4, 28, 33, 5, [][]interface{}{{29}, {2.9}}}, } - for i := 0; iter.HasNext(); i++ { - checkTestWindow(t, iter, expected[i]) + for i := 0; rCopy.HasNext(); i++ { + checkTestWindow(t, rCopy, expected[i]) } - _, w, err := iter.Next() + _, w, err := rCopy.Next() assert.Nil(t, w) - assert.Nil(t, err) + assert.NoError(t, err) }) } @@ -345,25 +304,23 @@ type testWindow struct { cols [][]interface{} } -func checkTestWindow(t *testing.T, iter *intervalRollingIter, expected testWindow) { - wi, w, err := iter.Next() +func checkTestWindow(t *testing.T, r *intervalRolling, expected testWindow) { + wi, w, err := r.Next() assert.Equal(t, expected.windowIndex, wi) assert.NotNil(t, w) - assert.Nil(t, err) + assert.NoError(t, err) - assert.Equal(t, expected.start, w.Start) - assert.Equal(t, expected.end, w.End) + assert.Equal(t, expected.start, w.FirstValue) + assert.Equal(t, expected.end, w.LastValue) assert.Equal(t, expected.firstIndex, w.FirstIndex) - b := newIntervalRollingTestBow(expected.cols) + b := newIntervalRollingTestBow(t, expected.cols) assert.True(t, w.Bow.Equal(b), "expect: %v\nhave: %v", b, w.Bow) } -func newIntervalRollingTestBow(cols [][]interface{}) bow.Bow { +func newIntervalRollingTestBow(t *testing.T, cols [][]interface{}) bow.Bow { colNames := []string{timeCol, valueCol} - types := []bow.Type{bow.Int64, bow.Float64} - b, err := bow.NewBowFromColBasedInterfaces(colNames, types, cols) - if err != nil { - panic(err) - } + colTypes := []bow.Type{bow.Int64, bow.Float64} + b, err := bow.NewBowFromColBasedInterfaces(colNames, colTypes, cols) + require.NoError(t, err) return b } diff --git a/transform/factor.go b/rolling/transformation/factor.go similarity index 72% rename from transform/factor.go rename to rolling/transformation/factor.go index c9cc50c..f594eb9 100644 --- a/transform/factor.go +++ b/rolling/transformation/factor.go @@ -1,10 +1,10 @@ -package transform +package transformation import "fmt" -type Transform func(interface{}) (interface{}, error) +type Func func(interface{}) (interface{}, error) -func Factor(n float64) Transform { +func Factor(n float64) Func { return func(x interface{}) (interface{}, error) { switch x := x.(type) { case float64: diff --git a/transform/factor_test.go b/rolling/transformation/factor_test.go similarity index 96% rename from transform/factor_test.go rename to rolling/transformation/factor_test.go index b97ca53..6ab1f9c 100644 --- a/transform/factor_test.go +++ b/rolling/transformation/factor_test.go @@ -1,4 +1,4 @@ -package transform +package transformation import ( "testing" diff --git a/rolling/window.go b/rolling/window.go index 1598d95..7f7479b 100644 --- a/rolling/window.go +++ b/rolling/window.go @@ -2,21 +2,30 @@ package rolling import "github.com/metronlab/bow" +// Window represents an interval-based window of data with: +// Bow: data +// FirstIndex: index (across all windows) of first row in this window (-1 if none) +// IntervalColIndex: index of the interval column +// FirstValue: Window first value +// LastValue: Window last value +// IsInclusive: Window is inclusive, i.e. includes the last point at the end of the interval type Window struct { Bow bow.Bow - FirstIndex int // index (across all windows) of first row in this window (-1 if none) + FirstIndex int IntervalColIndex int - Start int64 - End int64 + FirstValue int64 + LastValue int64 IsInclusive bool } +// UnsetInclusive returns a copy of the Window with the IsInclusive parameter set to false and with the last row sliced off. +// Returns the unchanged Window if the IsInclusive parameter is not set. func (w Window) UnsetInclusive() Window { if !w.IsInclusive { return w } - cp := w - cp.IsInclusive = false - cp.Bow = cp.Bow.NewSlice(0, cp.Bow.NumRows()-1) - return cp + wCopy := w + wCopy.IsInclusive = false + wCopy.Bow = wCopy.Bow.NewSlice(0, wCopy.Bow.NumRows()-1) + return wCopy } From d76dffba0f212c9c4e9d7212aba5812e6844dd56 Mon Sep 17 00:00:00 2001 From: agelloz Date: Mon, 25 Apr 2022 16:51:05 +0200 Subject: [PATCH 04/29] corrections --- Makefile | 3 +- bowbuffer.go | 6 ++-- rolling/aggregation.go | 6 ++-- rolling/interpolation.go | 6 ++-- rolling/rolling.go | 78 ++++++++++++++++++++-------------------- 5 files changed, 49 insertions(+), 50 deletions(-) diff --git a/Makefile b/Makefile index 6739073..8113222 100644 --- a/Makefile +++ b/Makefile @@ -5,13 +5,12 @@ install: @go install golang.org/x/perf/cmd/benchstat@latest @go install github.com/jstemmer/go-junit-report@latest @go install github.com/Metronlab/genius@latest - @go install golang.org/x/tools/cmd/godoc@latest gen: @go generate $(PKG) lint: - golangci-lint run -E gofmt,gci --fix -v $(PKG) + golangci-lint run -E gofmt --fix -v $(PKG) count: @bash -c $(PWD)/scripts/count-code-lines.sh diff --git a/bowbuffer.go b/bowbuffer.go index 95f735c..1135dd6 100644 --- a/bowbuffer.go +++ b/bowbuffer.go @@ -8,8 +8,8 @@ import ( ) // Buffer is a mutable data structure with the purpose of easily building data Series with: -// - Data: slice of data -// - nullBitmapBytes: slice of bytes representing +// - Data: slice of data. +// - nullBitmapBytes: slice of bytes representing valid or null values. type Buffer struct { Data interface{} nullBitmapBytes []byte @@ -47,7 +47,7 @@ func buildNullBitmapBytes(dataLength int, validityArray interface{}) []byte { return res } -// NewBufferFromInterfaces returns a new Buffer of type `typ` with the data represented as a slice of interface{}, with eventual nil values. +// NewBufferFromInterfaces returns a new typed Buffer with the data represented as a slice of interface{}, with eventual nil values. func NewBufferFromInterfaces(typ Type, data []interface{}) (Buffer, error) { buf := NewBuffer(len(data), typ) for i, c := range data { diff --git a/rolling/aggregation.go b/rolling/aggregation.go index cb483cc..376f5e4 100644 --- a/rolling/aggregation.go +++ b/rolling/aggregation.go @@ -162,7 +162,7 @@ func (r *intervalRolling) indexedAggregations(aggrs []ColAggregation) (int, []Co if newIntervalCol == -1 { return -1, nil, fmt.Errorf( - "must keep interval column '%s'", r.bow.ColumnName(r.colIndex)) + "must keep interval column '%s'", r.bow.ColumnName(r.intervalColIndex)) } return newIntervalCol, aggrs, nil @@ -184,7 +184,7 @@ func (r *intervalRolling) validateAggregation(aggr ColAggregation, newIndex int) r.options.Inclusive = true } - return readIndex == r.colIndex, nil + return readIndex == r.intervalColIndex, nil } func (r *intervalRolling) aggregateWindows(aggrs []ColAggregation) (bow.Bow, error) { @@ -194,7 +194,7 @@ func (r *intervalRolling) aggregateWindows(aggrs []ColAggregation) (bow.Bow, err rCopy := *r typ := aggr.GetReturnType( rCopy.bow.ColumnType(aggr.InputIndex()), - rCopy.bow.ColumnType(rCopy.colIndex)) + rCopy.bow.ColumnType(rCopy.intervalColIndex)) buf := bow.NewBuffer(rCopy.numWindows, typ) for rCopy.HasNext() { diff --git a/rolling/interpolation.go b/rolling/interpolation.go index 6ac0534..fc7dc8e 100644 --- a/rolling/interpolation.go +++ b/rolling/interpolation.go @@ -49,7 +49,7 @@ func (r *intervalRolling) Interpolate(interps ...ColInterpolation) Rolling { } if newIntervalCol == -1 { - return rCopy.setError(fmt.Errorf("must keep interval column '%s'", r.bow.ColumnName(r.colIndex))) + return rCopy.setError(fmt.Errorf("must keep interval column '%s'", r.bow.ColumnName(r.intervalColIndex))) } b, err := rCopy.interpolateWindows(interps) @@ -92,7 +92,7 @@ func (r *intervalRolling) validateInterpolation(interp *ColInterpolation, newInd interp.inputTypes, colType) } - return interp.colIndex == r.colIndex, nil + return interp.colIndex == r.intervalColIndex, nil } func (r *intervalRolling) interpolateWindows(interps []ColInterpolation) (bow.Bow, error) { @@ -118,7 +118,7 @@ func (r *intervalRolling) interpolateWindows(interps []ColInterpolation) (bow.Bo func (r *intervalRolling) interpolateWindow(interps []ColInterpolation, window *Window) (bow.Bow, error) { var firstColValue int64 = -1 if window.Bow.NumRows() > 0 { - firstColVal, i := window.Bow.GetNextFloat64(r.colIndex, 0) + firstColVal, i := window.Bow.GetNextFloat64(r.intervalColIndex, 0) if i > -1 { firstColValue = int64(firstColVal) } diff --git a/rolling/rolling.go b/rolling/rolling.go index fe60613..9eedf9b 100644 --- a/rolling/rolling.go +++ b/rolling/rolling.go @@ -30,16 +30,16 @@ type Rolling interface { type intervalRolling struct { // TODO: sync.Mutex - bow bow.Bow - colIndex int - interval int64 - options Options - numWindows int - - currWindowStart int64 - currRowIndex int - currWindowIndex int - err error + bow bow.Bow + intervalColIndex int + interval int64 + options Options + numWindows int + + currWindowFirstValue int64 + currRowIndex int + currWindowIndex int + err error } // Options sets options for IntervalRolling: @@ -66,10 +66,10 @@ func IntervalRolling(b bow.Bow, colName string, interval int64, options Options) return newIntervalRolling(b, colIndex, interval, options) } -func newIntervalRolling(b bow.Bow, colIndex int, interval int64, options Options) (Rolling, error) { - if b.ColumnType(colIndex) != bow.Int64 { +func newIntervalRolling(b bow.Bow, intervalColIndex int, interval int64, options Options) (Rolling, error) { + if b.ColumnType(intervalColIndex) != bow.Int64 { return nil, fmt.Errorf("impossible to create a new intervalRolling on column of type %v", - b.ColumnType(colIndex)) + b.ColumnType(intervalColIndex)) } var err error @@ -83,31 +83,31 @@ func newIntervalRolling(b bow.Bow, colIndex int, interval int64, options Options return nil, fmt.Errorf("enforcePrevRow: %w", err) } - var firstWindowStart int64 + var windowFirstValue int64 if b.NumRows() > 0 { - firstBowValue, valid := b.GetInt64(colIndex, 0) + firstBowValue, valid := b.GetInt64(intervalColIndex, 0) if !valid { return nil, fmt.Errorf( "the first value of the column should be convertible to int64, got %v", - b.GetValue(colIndex, 0)) + b.GetValue(intervalColIndex, 0)) } - // align first window start on interval - firstWindowStart = (firstBowValue/interval)*interval + options.Offset - if firstWindowStart > firstBowValue { - firstWindowStart -= interval + // align window first value on interval + windowFirstValue = (firstBowValue/interval)*interval + options.Offset + if windowFirstValue > firstBowValue { + windowFirstValue -= interval } } - numWindows := countWindows(b, colIndex, firstWindowStart, interval) + numWindows := countWindows(b, intervalColIndex, windowFirstValue, interval) return &intervalRolling{ - bow: b, - colIndex: colIndex, - interval: interval, - options: options, - numWindows: numWindows, - currWindowStart: firstWindowStart, + bow: b, + intervalColIndex: intervalColIndex, + interval: interval, + options: options, + numWindows: numWindows, + currWindowFirstValue: windowFirstValue, }, nil } @@ -164,12 +164,12 @@ func (r *intervalRolling) HasNext() bool { return false } - lastBowValue, lastBowValueIsValid := r.bow.GetInt64(r.colIndex, r.bow.NumRows()-1) + lastBowValue, lastBowValueIsValid := r.bow.GetInt64(r.intervalColIndex, r.bow.NumRows()-1) if !lastBowValueIsValid { return false } - return r.currWindowStart <= lastBowValue + return r.currWindowFirstValue <= lastBowValue } // TODO: concurrent-safe @@ -179,26 +179,26 @@ func (r *intervalRolling) Next() (windowIndex int, window *Window, err error) { return r.currWindowIndex, nil, nil } - windowStart := r.currWindowStart - windowEnd := r.currWindowStart + r.interval // include last position even if last point is excluded + firstValue := r.currWindowFirstValue + lastValue := r.currWindowFirstValue + r.interval // include last position even if last point is excluded rowIndex := 0 isInclusive := false firstRowIndex := r.currRowIndex lastRowIndex := -1 for rowIndex = firstRowIndex; rowIndex < r.bow.NumRows(); rowIndex++ { - ref, ok := r.bow.GetInt64(r.colIndex, rowIndex) + val, ok := r.bow.GetInt64(r.intervalColIndex, rowIndex) if !ok { continue } - if ref < windowStart { + if val < firstValue { continue } - if ref > windowEnd { + if val > lastValue { break } - if ref == windowEnd { + if val == lastValue { if isInclusive { break } @@ -217,7 +217,7 @@ func (r *intervalRolling) Next() (windowIndex int, window *Window, err error) { r.currRowIndex = rowIndex - 1 } - r.currWindowStart = windowEnd + r.currWindowFirstValue = lastValue windowIndex = r.currWindowIndex r.currWindowIndex++ @@ -231,9 +231,9 @@ func (r *intervalRolling) Next() (windowIndex int, window *Window, err error) { return windowIndex, &Window{ Bow: b, FirstIndex: firstRowIndex, - IntervalColIndex: r.colIndex, - FirstValue: windowStart, - LastValue: windowEnd, + IntervalColIndex: r.intervalColIndex, + FirstValue: firstValue, + LastValue: lastValue, IsInclusive: isInclusive, }, nil } From 27f022aaaf864d2776bfbcb9c3703d944c3aeaeb Mon Sep 17 00:00:00 2001 From: agelloz Date: Mon, 25 Apr 2022 17:33:20 +0200 Subject: [PATCH 05/29] first --- CHANGELOG.md | 9 ++++ Makefile | 8 ++-- arrowtests/arrow.go | 12 ++--- arrowtests/arrow_test.go | 6 ++- bow.go | 4 +- bowappend.gen.go | 6 +-- bowappend.gen.go.tmpl | 6 +-- bowassertion.go | 2 +- bowbuffer.gen.go | 4 +- bowbuffer.gen.go.tmpl | 4 +- bowbuffer.go | 2 +- bowfill.go | 2 +- bowgetters.go | 4 +- bowjoin.gen.go | 2 +- bowjoin.gen.go.tmpl | 2 +- bowmetadata.go | 4 +- bowparquet_test.go | 2 +- bowrecord.go | 4 +- bowseries.gen.go | 8 ++-- bowseries.gen.go.tmpl | 8 ++-- bowseries.go | 4 +- bowtypes.go | 2 +- bowvalues.go | 2 +- go.mod | 31 +++++++----- go.sum | 94 ++++++++++++++++++++++++++++++------- scripts/count-code-lines.sh | 11 ----- 26 files changed, 153 insertions(+), 90 deletions(-) delete mode 100755 scripts/count-code-lines.sh diff --git a/CHANGELOG.md b/CHANGELOG.md index 4cf4c17..a5df524 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,12 @@ +UNRELEASED [XXXX-XX-XX] +------------------- + +- General + - bump to go 1.18 + - bump to arrow/go/v8 + - remove useless count script + - add gci linter + v0.18.0 [2022-02-16] ------------------- diff --git a/Makefile b/Makefile index 8113222..627d615 100644 --- a/Makefile +++ b/Makefile @@ -1,19 +1,17 @@ #user overridable variables -all: lint count test +all: lint test install: @go install golang.org/x/perf/cmd/benchstat@latest @go install github.com/jstemmer/go-junit-report@latest @go install github.com/Metronlab/genius@latest + curl -sSfL https://raw.githubusercontent.com/golangci/golangci-lint/master/install.sh | sudo sh -s -- -b $(go env GOPATH)/bin v1.45.2 gen: @go generate $(PKG) lint: - golangci-lint run -E gofmt --fix -v $(PKG) - -count: - @bash -c $(PWD)/scripts/count-code-lines.sh + golangci-lint run -E gofmt,gci --fix -v $(PKG) test: @RUN=$(RUN) PKG=$(PKG) TIMEOUT=$(TIMEOUT) bash -c $(PWD)/scripts/test.sh diff --git a/arrowtests/arrow.go b/arrowtests/arrow.go index 8723d46..e7a89a9 100644 --- a/arrowtests/arrow.go +++ b/arrowtests/arrow.go @@ -3,9 +3,9 @@ package arrowtests import ( "fmt" - "github.com/apache/arrow/go/arrow" - "github.com/apache/arrow/go/arrow/array" - "github.com/apache/arrow/go/arrow/memory" + "github.com/apache/arrow/go/v8/arrow" + "github.com/apache/arrow/go/v8/arrow/array" + "github.com/apache/arrow/go/v8/arrow/memory" ) var ( @@ -25,7 +25,7 @@ type Event struct { } //NewTSRecord Create a new sample base on eventSchema -func NewTSRecord() (*arrow.Schema, array.Record) { +func NewTSRecord() (*arrow.Schema, arrow.Record) { pool := memory.NewGoAllocator() b := array.NewRecordBuilder(pool, EventSchema) defer b.Release() @@ -38,14 +38,14 @@ func NewTSRecord() (*arrow.Schema, array.Record) { } //PrintRecordColumns Print a columns based output -func PrintRecordColumns(rec array.Record) { +func PrintRecordColumns(rec arrow.Record) { for i, col := range rec.Columns() { fmt.Printf("column[%d] %q: %v\n", i, rec.ColumnName(i), col) } } //PrintRecordRows Print a row based output -func PrintRecordRows(schema *arrow.Schema, recs []array.Record) { +func PrintRecordRows(schema *arrow.Schema, recs []arrow.Record) { // Make a table read only based on many records table := array.NewTableFromRecords(schema, recs) defer table.Release() diff --git a/arrowtests/arrow_test.go b/arrowtests/arrow_test.go index c77a082..8027276 100644 --- a/arrowtests/arrow_test.go +++ b/arrowtests/arrow_test.go @@ -1,6 +1,8 @@ package arrowtests -import "github.com/apache/arrow/go/arrow/array" +import ( + "github.com/apache/arrow/go/v8/arrow" +) func ExamplePrintRecordColumns() { _, rec := NewTSRecord() @@ -18,7 +20,7 @@ func ExamplePrintRecordRows() { s, rec := NewTSRecord() defer rec.Release() - PrintRecordRows(s, []array.Record{rec}) + PrintRecordRows(s, []arrow.Record{rec}) // Output: //time: 1 , value: 7 , quality: 42 diff --git a/bow.go b/bow.go index f9e755f..c3fe159 100644 --- a/bow.go +++ b/bow.go @@ -6,8 +6,8 @@ import ( "reflect" "time" - "github.com/apache/arrow/go/v7/arrow" - "github.com/apache/arrow/go/v7/arrow/array" + "github.com/apache/arrow/go/v8/arrow" + "github.com/apache/arrow/go/v8/arrow/array" ) // Bow is a wrapper of Apache Arrow array.Record interface. diff --git a/bowappend.gen.go b/bowappend.gen.go index 01280eb..94d183a 100644 --- a/bowappend.gen.go +++ b/bowappend.gen.go @@ -4,9 +4,9 @@ package bow import ( "fmt" - "github.com/apache/arrow/go/v7/arrow" - "github.com/apache/arrow/go/v7/arrow/array" - "github.com/apache/arrow/go/v7/arrow/memory" + "github.com/apache/arrow/go/v8/arrow" + "github.com/apache/arrow/go/v8/arrow/array" + "github.com/apache/arrow/go/v8/arrow/memory" ) // AppendBows attempts to append bows with equal schemas. diff --git a/bowappend.gen.go.tmpl b/bowappend.gen.go.tmpl index 15ef2eb..5e60c76 100644 --- a/bowappend.gen.go.tmpl +++ b/bowappend.gen.go.tmpl @@ -2,9 +2,9 @@ package bow import ( "fmt" - "github.com/apache/arrow/go/v7/arrow" - "github.com/apache/arrow/go/v7/arrow/array" - "github.com/apache/arrow/go/v7/arrow/memory" + "github.com/apache/arrow/go/v8/arrow" + "github.com/apache/arrow/go/v8/arrow/array" + "github.com/apache/arrow/go/v8/arrow/memory" ) // AppendBows attempts to append bows with equal schemas. diff --git a/bowassertion.go b/bowassertion.go index ac952e1..2bcc297 100644 --- a/bowassertion.go +++ b/bowassertion.go @@ -1,7 +1,7 @@ package bow import ( - "github.com/apache/arrow/go/v7/arrow/array" + "github.com/apache/arrow/go/v8/arrow/array" ) const ( diff --git a/bowbuffer.gen.go b/bowbuffer.gen.go index 3e59ee3..90699a4 100644 --- a/bowbuffer.gen.go +++ b/bowbuffer.gen.go @@ -5,8 +5,8 @@ package bow import ( "fmt" - "github.com/apache/arrow/go/v7/arrow/array" - "github.com/apache/arrow/go/v7/arrow/bitutil" + "github.com/apache/arrow/go/v8/arrow/array" + "github.com/apache/arrow/go/v8/arrow/bitutil" ) func NewBuffer(size int, typ Type) Buffer { diff --git a/bowbuffer.gen.go.tmpl b/bowbuffer.gen.go.tmpl index 358bd20..856ced6 100644 --- a/bowbuffer.gen.go.tmpl +++ b/bowbuffer.gen.go.tmpl @@ -3,8 +3,8 @@ package bow import ( "fmt" - "github.com/apache/arrow/go/v7/arrow/array" - "github.com/apache/arrow/go/v7/arrow/bitutil" + "github.com/apache/arrow/go/v8/arrow/array" + "github.com/apache/arrow/go/v8/arrow/bitutil" ) func NewBuffer(size int, typ Type) Buffer { diff --git a/bowbuffer.go b/bowbuffer.go index 26dd72a..dbe3811 100644 --- a/bowbuffer.go +++ b/bowbuffer.go @@ -4,7 +4,7 @@ import ( "fmt" "sort" - "github.com/apache/arrow/go/v7/arrow/bitutil" + "github.com/apache/arrow/go/v8/arrow/bitutil" ) type Buffer struct { diff --git a/bowfill.go b/bowfill.go index 1bc51e7..9004a5a 100644 --- a/bowfill.go +++ b/bowfill.go @@ -5,7 +5,7 @@ import ( "math" "sync" - "github.com/apache/arrow/go/v7/arrow/array" + "github.com/apache/arrow/go/v8/arrow/array" ) // FillLinear fills the column toFillColIndex using the Linear interpolation method according diff --git a/bowgetters.go b/bowgetters.go index 01fe9f3..bb4f065 100644 --- a/bowgetters.go +++ b/bowgetters.go @@ -4,8 +4,8 @@ import ( "fmt" "sort" - "github.com/apache/arrow/go/v7/arrow" - "github.com/apache/arrow/go/v7/arrow/array" + "github.com/apache/arrow/go/v8/arrow" + "github.com/apache/arrow/go/v8/arrow/array" ) func (b *bow) GetRow(rowIndex int) map[string]interface{} { diff --git a/bowjoin.gen.go b/bowjoin.gen.go index 418e8cf..81f82b9 100644 --- a/bowjoin.gen.go +++ b/bowjoin.gen.go @@ -5,7 +5,7 @@ package bow import ( "fmt" - "github.com/apache/arrow/go/v7/arrow/array" + "github.com/apache/arrow/go/v8/arrow/array" ) func innerFillLeftBowCols(newSeries *[]Series, left, right *bow, newNumRows int, diff --git a/bowjoin.gen.go.tmpl b/bowjoin.gen.go.tmpl index 6aef1cd..fe60161 100644 --- a/bowjoin.gen.go.tmpl +++ b/bowjoin.gen.go.tmpl @@ -3,7 +3,7 @@ package bow import ( "fmt" - "github.com/apache/arrow/go/v7/arrow/array" + "github.com/apache/arrow/go/v8/arrow/array" ) func innerFillLeftBowCols(newSeries *[]Series, left, right *bow, newNumRows int, diff --git a/bowmetadata.go b/bowmetadata.go index ac0b18e..28cb52d 100644 --- a/bowmetadata.go +++ b/bowmetadata.go @@ -3,8 +3,8 @@ package bow import ( "fmt" - "github.com/apache/arrow/go/v7/arrow" - "github.com/apache/arrow/go/v7/arrow/array" + "github.com/apache/arrow/go/v8/arrow" + "github.com/apache/arrow/go/v8/arrow/array" ) // Metadata is an arrow metadata wrapping diff --git a/bowparquet_test.go b/bowparquet_test.go index c4267f6..d8bc1c2 100644 --- a/bowparquet_test.go +++ b/bowparquet_test.go @@ -7,7 +7,7 @@ import ( "testing" "time" - "github.com/apache/arrow/go/v7/arrow" + "github.com/apache/arrow/go/v8/arrow" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "github.com/xitongsys/parquet-go/parquet" diff --git a/bowrecord.go b/bowrecord.go index 188a8ec..a58e94e 100644 --- a/bowrecord.go +++ b/bowrecord.go @@ -4,8 +4,8 @@ import ( "errors" "fmt" - "github.com/apache/arrow/go/v7/arrow" - "github.com/apache/arrow/go/v7/arrow/array" + "github.com/apache/arrow/go/v8/arrow" + "github.com/apache/arrow/go/v8/arrow/array" ) func NewBowFromRecord(record arrow.Record) (Bow, error) { diff --git a/bowseries.gen.go b/bowseries.gen.go index f670ec0..b7f1b82 100644 --- a/bowseries.gen.go +++ b/bowseries.gen.go @@ -4,10 +4,10 @@ package bow import ( "fmt" - "github.com/apache/arrow/go/v7/arrow" - "github.com/apache/arrow/go/v7/arrow/array" - "github.com/apache/arrow/go/v7/arrow/bitutil" - "github.com/apache/arrow/go/v7/arrow/memory" + "github.com/apache/arrow/go/v8/arrow" + "github.com/apache/arrow/go/v8/arrow/array" + "github.com/apache/arrow/go/v8/arrow/bitutil" + "github.com/apache/arrow/go/v8/arrow/memory" ) func NewSeries(name string, dataArray interface{}, validityArray interface{}) Series { diff --git a/bowseries.gen.go.tmpl b/bowseries.gen.go.tmpl index c6496b3..409f5d5 100644 --- a/bowseries.gen.go.tmpl +++ b/bowseries.gen.go.tmpl @@ -2,10 +2,10 @@ package bow import ( "fmt" - "github.com/apache/arrow/go/v7/arrow" - "github.com/apache/arrow/go/v7/arrow/array" - "github.com/apache/arrow/go/v7/arrow/bitutil" - "github.com/apache/arrow/go/v7/arrow/memory" + "github.com/apache/arrow/go/v8/arrow" + "github.com/apache/arrow/go/v8/arrow/array" + "github.com/apache/arrow/go/v8/arrow/bitutil" + "github.com/apache/arrow/go/v8/arrow/memory" ) func NewSeries(name string, dataArray interface{}, validityArray interface{}) Series { diff --git a/bowseries.go b/bowseries.go index c40f89d..ebecb3b 100644 --- a/bowseries.go +++ b/bowseries.go @@ -4,8 +4,8 @@ import ( "encoding/json" "fmt" - "github.com/apache/arrow/go/arrow/bitutil" - "github.com/apache/arrow/go/v7/arrow" + "github.com/apache/arrow/go/v8/arrow" + "github.com/apache/arrow/go/v8/arrow/bitutil" ) // A Series is simply a named Apache Arrow array.Interface, which is immutable diff --git a/bowtypes.go b/bowtypes.go index bfb0362..9772d9d 100644 --- a/bowtypes.go +++ b/bowtypes.go @@ -1,7 +1,7 @@ package bow import ( - "github.com/apache/arrow/go/v7/arrow" + "github.com/apache/arrow/go/v8/arrow" ) type Type int diff --git a/bowvalues.go b/bowvalues.go index e75edd9..baf7a4d 100644 --- a/bowvalues.go +++ b/bowvalues.go @@ -1,6 +1,6 @@ package bow -import "github.com/apache/arrow/go/v7/arrow/array" +import "github.com/apache/arrow/go/v8/arrow/array" func Int64Values(arr *array.Int64) []int64 { return arr.Int64Values() diff --git a/go.mod b/go.mod index 023343d..d113fcf 100644 --- a/go.mod +++ b/go.mod @@ -1,28 +1,33 @@ module github.com/metronlab/bow -go 1.17 +go 1.18 require ( - github.com/apache/arrow/go/arrow v0.0.0-20200730104253-651201b0f516 - github.com/apache/arrow/go/v7 v7.0.0-20220201131309-56e270fda7f5 - github.com/apache/thrift v0.15.0 // indirect + github.com/apache/arrow/go/arrow v0.0.0-20211112161151-bc219186db40 // indirect + github.com/apache/arrow/go/v8 v8.0.0-20220425143814-555b4d27192e + github.com/apache/thrift v0.16.0 // indirect github.com/davecgh/go-spew v1.1.1 // indirect github.com/google/uuid v1.3.0 github.com/pmezard/go-difflib v1.0.0 // indirect github.com/stretchr/testify v1.7.0 github.com/xitongsys/parquet-go v1.6.2 - github.com/xitongsys/parquet-go-source v0.0.0-20200817004010-026bad9b25d0 - golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1 // indirect + github.com/xitongsys/parquet-go-source v0.0.0-20220315005136-aec0fe3e777c + golang.org/x/xerrors v0.0.0-20220411194840-2f41105eb62f // indirect gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c // indirect ) require ( - github.com/goccy/go-json v0.9.4 // indirect + github.com/andybalholm/brotli v1.0.4 // indirect + github.com/goccy/go-json v0.9.7 // indirect github.com/golang/snappy v0.0.4 // indirect - github.com/klauspost/compress v1.14.2 // indirect - github.com/pierrec/lz4/v4 v4.1.12 // indirect - golang.org/x/mod v0.6.0-dev.0.20211013180041-c96bc1413d57 // indirect - golang.org/x/sys v0.0.0-20220209214540-3681064d5158 // indirect - golang.org/x/tools v0.1.9 // indirect - gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c // indirect + github.com/klauspost/asmfmt v1.3.1 // indirect + github.com/klauspost/compress v1.15.1 // indirect + github.com/klauspost/cpuid/v2 v2.0.9 // indirect + github.com/minio/asm2plan9s v0.0.0-20200509001527-cdd76441f9d8 // indirect + github.com/minio/c2goasm v0.0.0-20190812172519-36a3d3bbc4f3 // indirect + github.com/pierrec/lz4/v4 v4.1.14 // indirect + github.com/zeebo/xxh3 v1.0.1 // indirect + golang.org/x/mod v0.6.0-dev.0.20220106191415-9b9b3d81d5e3 // indirect + golang.org/x/sys v0.0.0-20220422013727-9388b58f7150 // indirect + golang.org/x/tools v0.1.10 // indirect ) diff --git a/go.sum b/go.sum index 6f594b7..edaf32f 100644 --- a/go.sum +++ b/go.sum @@ -22,8 +22,17 @@ cloud.google.com/go/storage v1.6.0/go.mod h1:N7U0C8pVQ/+NIKOBQyamJIeKQKkZ+mxpohl dmitri.shuralyov.com/gpu/mtl v0.0.0-20190408044501-666a987793e9/go.mod h1:H6x//7gZCb22OMCxBHrMx7a5I7Hp++hsVxbQ4BYO7hU= dmitri.shuralyov.com/gpu/mtl v0.0.0-20201218220906-28db891af037/go.mod h1:H6x//7gZCb22OMCxBHrMx7a5I7Hp++hsVxbQ4BYO7hU= gioui.org v0.0.0-20210308172011-57750fc8a0a6/go.mod h1:RSH6KIUZ0p2xy5zHDxgAM4zumjgTw83q2ge/PI+yyw8= +github.com/Azure/azure-pipeline-go v0.2.3/go.mod h1:x841ezTBIMG6O3lAcl8ATHnsOPVl2bqk7S3ta6S6u4k= +github.com/Azure/azure-storage-blob-go v0.14.0/go.mod h1:SMqIBi+SuiQH32bvyjngEewEeXoPfKMgWlBDaYf6fck= +github.com/Azure/go-autorest v14.2.0+incompatible/go.mod h1:r+4oMnoxhatjLLJ6zxSWATqVooLgysK6ZNox3g/xq24= +github.com/Azure/go-autorest/autorest/adal v0.9.13/go.mod h1:W/MM4U6nLxnIskrw4UwWzlHfGjwUS50aOsc/I3yuU8M= +github.com/Azure/go-autorest/autorest/date v0.3.0/go.mod h1:BI0uouVdmngYNUzGWeSYnokU+TrmwEsOqdt8Y6sso74= +github.com/Azure/go-autorest/autorest/mocks v0.4.1/go.mod h1:LTp+uSrOhSkaKrUy935gNZuuIPPVsHlr9DSOxSayd+k= +github.com/Azure/go-autorest/logger v0.2.1/go.mod h1:T9E3cAhj2VqvPOtCYAvby9aBXkZmbF5NWuPV8+WeEW8= +github.com/Azure/go-autorest/tracing v0.6.0/go.mod h1:+vhtPC754Xsa23ID7GlGsrdKBpUA79WCAKPPZVC2DeU= github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802/go.mod h1:IVnqGOEym/WlBOVXweHU+Q+/VP0lqqI8lqeDx9IjBqo= +github.com/JohnCGriffin/overflow v0.0.0-20211019200055-46fa312c352c h1:RGWPOewvKIROun94nF7v2cua9qP+thov/7M50KEoeSU= github.com/JohnCGriffin/overflow v0.0.0-20211019200055-46fa312c352c/go.mod h1:X0CRv0ky0k6m906ixxpzmDRLvX58TFUKS2eePweuyxk= github.com/Knetic/govaluate v3.0.1-0.20171022003610-9aa49832a739+incompatible/go.mod h1:r7JcOSlj0wfOMncg0iLm8Leh48TZaKVeNIfJntJ2wa0= github.com/OneOfOne/xxhash v1.2.2/go.mod h1:HSdplMjZKSmBqAxg5vPj2TmRDmfkzw+cTzAElWljhcU= @@ -36,18 +45,21 @@ github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc/go.mod h1:LOuy github.com/alecthomas/template v0.0.0-20190718012654-fb15b899a751/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc= github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0= github.com/alecthomas/units v0.0.0-20190717042225-c3de453c63f4/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0= +github.com/andybalholm/brotli v1.0.4 h1:V7DdXeJtZscaqfNuAdSRuRFzuiKlHSC/Zh3zl9qY3JY= github.com/andybalholm/brotli v1.0.4/go.mod h1:fO7iG3H7G2nSZ7m0zPUDn85XEX2GTukHGRSepvi9Eig= github.com/antihax/optional v1.0.0/go.mod h1:uupD/76wgC+ih3iEmQUL+0Ugr19nfwCT1kdvxnR2qWY= -github.com/apache/arrow/go/arrow v0.0.0-20200730104253-651201b0f516 h1:byKBBF2CKWBjjA4J1ZL2JXttJULvWSl50LegTyRZ728= github.com/apache/arrow/go/arrow v0.0.0-20200730104253-651201b0f516/go.mod h1:QNYViu/X0HXDHw7m3KXzWSVXIbfUvJqBFe6Gj8/pYA0= -github.com/apache/arrow/go/v7 v7.0.0-20220201131309-56e270fda7f5 h1:YFoQL6S5yYC6uHoScA5xUlRANdTAlHWT3YTL+Yfg/GY= -github.com/apache/arrow/go/v7 v7.0.0-20220201131309-56e270fda7f5/go.mod h1:EMQu9P1iWg58RroDhuW458NFFdNG89lIPHsSsAUTg90= +github.com/apache/arrow/go/arrow v0.0.0-20211112161151-bc219186db40 h1:q4dksr6ICHXqG5hm0ZW5IHyeEJXoIJSOZeBLmWPNeIQ= +github.com/apache/arrow/go/arrow v0.0.0-20211112161151-bc219186db40/go.mod h1:Q7yQnSMnLvcXlZ8RV+jwz/6y1rQTqbX6C82SndT52Zs= +github.com/apache/arrow/go/v8 v8.0.0-20220425143814-555b4d27192e h1:8tzo8QHYmveHydZDhaWswUUbgELAfZoKEv9c3+iFZls= +github.com/apache/arrow/go/v8 v8.0.0-20220425143814-555b4d27192e/go.mod h1:63co72EKYQT9WKr8Y1Yconk4dysC0t79wNDauYO1ZGg= github.com/apache/thrift v0.0.0-20181112125854-24918abba929/go.mod h1:cp2SuWMxlEZw2r+iP2GNCdIi4C1qmUzdZFSVb+bacwQ= github.com/apache/thrift v0.12.0/go.mod h1:cp2SuWMxlEZw2r+iP2GNCdIi4C1qmUzdZFSVb+bacwQ= github.com/apache/thrift v0.13.0/go.mod h1:cp2SuWMxlEZw2r+iP2GNCdIi4C1qmUzdZFSVb+bacwQ= github.com/apache/thrift v0.14.2/go.mod h1:cp2SuWMxlEZw2r+iP2GNCdIi4C1qmUzdZFSVb+bacwQ= -github.com/apache/thrift v0.15.0 h1:aGvdaR0v1t9XLgjtBYwxcBvBOTMqClzwE26CHOgjW1Y= github.com/apache/thrift v0.15.0/go.mod h1:PHK3hniurgQaNMZYaCLEqXKsYK8upmhPbmdP2FXSqgU= +github.com/apache/thrift v0.16.0 h1:qEy6UW60iVOlUy+b9ZR0d5WzUWYGOo4HfopoyBaNmoY= +github.com/apache/thrift v0.16.0/go.mod h1:PHK3hniurgQaNMZYaCLEqXKsYK8upmhPbmdP2FXSqgU= github.com/armon/circbuf v0.0.0-20150827004946-bbbad097214e/go.mod h1:3U/XgcO3hCbHZ8TKRvWD2dDTCfh9M9ya+I9JpbB7O8o= github.com/armon/go-metrics v0.0.0-20180917152333-f0300d1749da/go.mod h1:Q73ZrmVTwzkszR9V5SSuryQ31EELlFMUz1kKyl939pY= github.com/armon/go-radix v0.0.0-20180808171621-7fddfc383310/go.mod h1:ufUuZ+zHj4x4TnLV4JWEpy2hxWSpsRywHrMgIH9cCH8= @@ -56,6 +68,19 @@ github.com/aws/aws-lambda-go v1.13.3/go.mod h1:4UKl9IzQMoD+QF79YdCuzCwp8VbmG4VAQ github.com/aws/aws-sdk-go v1.27.0/go.mod h1:KmX6BPdI08NWTb3/sm4ZGu5ShLoqVDhKgpiN924inxo= github.com/aws/aws-sdk-go v1.30.19/go.mod h1:5zCpMtNQVjRREroY7sYe8lOMRSxkhG6MZveU8YkpAk0= github.com/aws/aws-sdk-go-v2 v0.18.0/go.mod h1:JWVYvqSMppoMJC0x5wdwiImzgXTI9FuZwxzkQq9wy+g= +github.com/aws/aws-sdk-go-v2 v1.7.1/go.mod h1:L5LuPC1ZgDr2xQS7AmIec/Jlc7O/Y1u2KxJyNVab250= +github.com/aws/aws-sdk-go-v2/config v1.5.0/go.mod h1:RWlPOAW3E3tbtNAqTwvSW54Of/yP3oiZXMI0xfUdjyA= +github.com/aws/aws-sdk-go-v2/credentials v1.3.1/go.mod h1:r0n73xwsIVagq8RsxmZbGSRQFj9As3je72C2WzUIToc= +github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.3.0/go.mod h1:2LAuqPx1I6jNfaGDucWfA2zqQCYCOMCDHiCOciALyNw= +github.com/aws/aws-sdk-go-v2/feature/s3/manager v1.3.2/go.mod h1:qaqQiHSrOUVOfKe6fhgQ6UzhxjwqVW8aHNegd6Ws4w4= +github.com/aws/aws-sdk-go-v2/internal/ini v1.1.1/go.mod h1:Zy8smImhTdOETZqfyn01iNOe0CNggVbPjCajyaz6Gvg= +github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.2.1/go.mod h1:v33JQ57i2nekYTA70Mb+O18KeH4KqhdqxTJZNK1zdRE= +github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.2.1/go.mod h1:zceowr5Z1Nh2WVP8bf/3ikB41IZW59E4yIYbg+pC6mw= +github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.5.1/go.mod h1:6EQZIwNNvHpq/2/QSJnp4+ECvqIy55w95Ofs0ze+nGQ= +github.com/aws/aws-sdk-go-v2/service/s3 v1.11.1/go.mod h1:XLAGFrEjbvMCLvAtWLLP32yTv8GpBquCApZEycDLunI= +github.com/aws/aws-sdk-go-v2/service/sso v1.3.1/go.mod h1:J3A3RGUvuCZjvSuZEcOpHDnzZP/sKbhDWV2T1EOzFIM= +github.com/aws/aws-sdk-go-v2/service/sts v1.6.0/go.mod h1:q7o0j7d7HrJk/vr9uUt3BVRASvcU7gYZB9PUgPiByXg= +github.com/aws/smithy-go v1.6.0/go.mod h1:SObp3lf9smib00L/v3U2eAKG8FyQ7iLrJnQiAmR5n+E= github.com/beorn7/perks v0.0.0-20180321164747-3a771d992973/go.mod h1:Dwedo/Wpr24TaqPxmxbtue+5NUziq4I4S80YR8gNf3Q= github.com/beorn7/perks v1.0.0/go.mod h1:KWe93zE9D1o94FZ5RNwFwVgaQK1VOXiVxmqh+CedLV8= github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw= @@ -102,12 +127,14 @@ github.com/envoyproxy/go-control-plane v0.9.0/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymF github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= github.com/envoyproxy/go-control-plane v0.9.4/go.mod h1:6rpuAdCZL397s3pYoYcLgu1mIlRU8Am5FuJP05cCM98= github.com/envoyproxy/go-control-plane v0.9.9-0.20201210154907-fd9021fe5dad/go.mod h1:cXg6YxExXjJnVBQHBLXeUAgxn2UodCpnH306RInaBQk= +github.com/envoyproxy/go-control-plane v0.9.9-0.20210217033140-668b12f5399d/go.mod h1:cXg6YxExXjJnVBQHBLXeUAgxn2UodCpnH306RInaBQk= github.com/envoyproxy/go-control-plane v0.9.9-0.20210512163311-63b5d3c536b0/go.mod h1:hliV/p42l8fGbc6Y9bQ70uLwIvmJyVE5k4iMKlh8wCQ= github.com/envoyproxy/go-control-plane v0.9.10-0.20210907150352-cf90f659a021/go.mod h1:AFq3mo9L8Lqqiid3OhADV3RfLJnjiw63cSpi+fDTRC0= github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c= github.com/fatih/color v1.7.0/go.mod h1:Zm6kSWBoL9eyXnKyktHP6abPY2pDugNf5KwzbycvMj4= github.com/fogleman/gg v1.2.1-0.20190220221249-0403632d5b90/go.mod h1:R/bRT+9gY/C5z7JzPU0zXsXHKM4/ayA+zqcVNZzPa1k= github.com/fogleman/gg v1.3.0/go.mod h1:R/bRT+9gY/C5z7JzPU0zXsXHKM4/ayA+zqcVNZzPa1k= +github.com/form3tech-oss/jwt-go v3.2.2+incompatible/go.mod h1:pbq4aXjuKjdthFRnoDwaVPLA+WlJuPGy+QneDUgJi2k= github.com/franela/goblin v0.0.0-20200105215937-c9ffbefa60db/go.mod h1:7dvUGVsVBjqR7JHJk0brhHOZYGmfBYOrK0ZhYMEtBr4= github.com/franela/goreq v0.0.0-20171204163338-bcd34c9993f8/go.mod h1:ZhphrRTfi2rbfLwlschooIH4+wKKDR4Pdxhh+TRoA20= github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo= @@ -130,8 +157,9 @@ github.com/go-logr/logr v0.4.0/go.mod h1:z6/tIYblkpsD+a4lm/fGIIU9mZ+XfAiaFtq7xTg github.com/go-sql-driver/mysql v1.4.0/go.mod h1:zAC/RDZ24gD3HViQzih4MyKcchzm+sOG5ZlKdlhCg5w= github.com/go-sql-driver/mysql v1.5.0/go.mod h1:DCzpHaOWr8IXmIStZouvnhqoel9Qv2LBy8hT2VhHyBg= github.com/go-stack/stack v1.8.0/go.mod h1:v0f6uXyyMGvRgIKkXu+yp6POWl0qKG85gN/melR3HDY= -github.com/goccy/go-json v0.9.4 h1:L8MLKG2mvVXiQu07qB6hmfqeSYQdOnqPot2GhsIwIaI= -github.com/goccy/go-json v0.9.4/go.mod h1:6MelG93GURQebXPDq3khkgXZkazVtN9CRI+MGFi0w8I= +github.com/goccy/go-json v0.9.6/go.mod h1:6MelG93GURQebXPDq3khkgXZkazVtN9CRI+MGFi0w8I= +github.com/goccy/go-json v0.9.7 h1:IcB+Aqpx/iMHu5Yooh7jEzJk1JZ7Pjtmys2ukPr7EeM= +github.com/goccy/go-json v0.9.7/go.mod h1:6MelG93GURQebXPDq3khkgXZkazVtN9CRI+MGFi0w8I= github.com/gogo/googleapis v1.1.0/go.mod h1:gf4bu3Q80BeJ6H1S1vYPm8/ELATdvryBaNFGgqEef3s= github.com/gogo/protobuf v1.1.1/go.mod h1:r8qH/GZQm5c6nD/R0oafs1akxWv10x8SbQlK7atdtwQ= github.com/gogo/protobuf v1.2.0/go.mod h1:r8qH/GZQm5c6nD/R0oafs1akxWv10x8SbQlK7atdtwQ= @@ -170,6 +198,7 @@ github.com/golang/snappy v0.0.4/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEW github.com/google/btree v0.0.0-20180813153112-4030bb1f1f0c/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ= github.com/google/btree v1.0.0/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ= github.com/google/flatbuffers v1.11.0/go.mod h1:1AeVuKshWv4vARoZatz6mlQ0JxURH0Kv5+zNeJKJCa8= +github.com/google/flatbuffers v2.0.0+incompatible/go.mod h1:1AeVuKshWv4vARoZatz6mlQ0JxURH0Kv5+zNeJKJCa8= github.com/google/flatbuffers v2.0.5+incompatible h1:ANsW0idDAXIY+mNHzIHxWRfabV2x5LUEEIIWcwsYgB8= github.com/google/flatbuffers v2.0.5+incompatible/go.mod h1:1AeVuKshWv4vARoZatz6mlQ0JxURH0Kv5+zNeJKJCa8= github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M= @@ -177,7 +206,9 @@ github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMyw github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/google/go-cmp v0.5.4/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/google/go-cmp v0.5.6/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.7/go.mod h1:n+brtR0CgQNWTVd5ZUFpTBC8YFBDLK/h/bpaJ8/DtOE= github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= github.com/google/martian v2.1.0+incompatible/go.mod h1:9I4somxYTbIHy5NJKHRl3wXiIaQGbYVAs8BPL6v8lEs= @@ -188,6 +219,7 @@ github.com/google/pprof v0.0.0-20200212024743-f11f1df84d12/go.mod h1:ZgVRPoUq/hf github.com/google/renameio v0.1.0/go.mod h1:KWCgfxg9yswjAJkECMjeO8J8rahYeXnNhOm40UhjYkI= github.com/google/uuid v1.0.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/google/uuid v1.1.2/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= +github.com/google/uuid v1.2.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/google/uuid v1.3.0 h1:t6JiXgmwXMjEs8VusXIJk2BXHsn+wx8BZdTaoZ5fu7I= github.com/google/uuid v1.3.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/googleapis/gax-go/v2 v2.0.4/go.mod h1:0Wqv26UfaUD9n4G6kQubkQ+KchISgw+vpHVxEJEs9eg= @@ -230,6 +262,8 @@ github.com/influxdata/influxdb1-client v0.0.0-20191209144304-8bf82d3c094d/go.mod github.com/jcmturner/gofork v0.0.0-20180107083740-2aebee971930/go.mod h1:MK8+TM0La+2rjBD4jE12Kj1pCCxK7d2LK/UM3ncEo0o= github.com/jmespath/go-jmespath v0.0.0-20180206201540-c2b33e8439af/go.mod h1:Nht3zPeWKUH0NzdCt2Blrr5ys8VGpn0CEB0cQHVjt7k= github.com/jmespath/go-jmespath v0.3.0/go.mod h1:9QtRXoHjLGCJ5IBSaohpXITPlowMeeYCZ7fLUTSywik= +github.com/jmespath/go-jmespath v0.4.0/go.mod h1:T8mJZnbsbmF+m6zOOFylbeCJqk5+pHWvzYPziyZiYoo= +github.com/jmespath/go-jmespath/internal/testify v1.5.1/go.mod h1:L3OGu8Wl2/fWfCI6z80xFu9LTZmf1ZRjMHUOPmWr69U= github.com/jonboulle/clockwork v0.1.0/go.mod h1:Ii8DK3G1RaLaWxj9trq07+26W01tbo22gdxWY5EU2bo= github.com/json-iterator/go v1.1.6/go.mod h1:+SdeFBvtyEkXs7REEP0seUULqWtbJapLOCVDaaPEHmU= github.com/json-iterator/go v1.1.7/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4= @@ -242,11 +276,14 @@ github.com/jung-kurt/gofpdf v1.0.0/go.mod h1:7Id9E/uU8ce6rXgefFLlgrJj/GYY22cpxn+ github.com/jung-kurt/gofpdf v1.0.3-0.20190309125859-24315acbbda5/go.mod h1:7Id9E/uU8ce6rXgefFLlgrJj/GYY22cpxn+r32jIOes= github.com/kisielk/errcheck v1.1.0/go.mod h1:EZBBE59ingxPouuu3KfxchcWSUPOHkagtvWXihfKN4Q= github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= +github.com/klauspost/asmfmt v1.3.1 h1:7xZi1N7s9gTLbqiM8KUv8TLyysavbTRGBT5/ly0bRtw= github.com/klauspost/asmfmt v1.3.1/go.mod h1:AG8TuvYojzulgDAMCnYn50l/5QV3Bs/tp6j0HLHbNSE= github.com/klauspost/compress v1.9.7/go.mod h1:RyIbtBH6LamlWaDj8nUwkbUhJ87Yi3uG0guNDohfE1A= github.com/klauspost/compress v1.13.1/go.mod h1:8dP1Hq4DHOhN9w426knH3Rhby4rFm6D8eO+e+Dq5Gzg= -github.com/klauspost/compress v1.14.2 h1:S0OHlFk/Gbon/yauFJ4FfJJF5V0fc5HbBTJazi28pRw= github.com/klauspost/compress v1.14.2/go.mod h1:/3/Vjq9QcHkK5uEr5lBEmyoZ1iFhe47etQ6QUkpK6sk= +github.com/klauspost/compress v1.15.1 h1:y9FcTHGyrebwfP0ZZqFiaxTaiDnUrGkJkI+f583BL1A= +github.com/klauspost/compress v1.15.1/go.mod h1:/3/Vjq9QcHkK5uEr5lBEmyoZ1iFhe47etQ6QUkpK6sk= +github.com/klauspost/cpuid/v2 v2.0.9 h1:lgaqFMSdTdQYdZ04uHyN2d/eKdOMyi2YLSvlQIBFYa4= github.com/klauspost/cpuid/v2 v2.0.9/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg= github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= github.com/kr/logfmt v0.0.0-20140226030751-b84e30acd515/go.mod h1:+0opPa2QZZtGFBFZlji/RkVcI2GknAs/DXo4wKdlNEc= @@ -260,12 +297,15 @@ github.com/lightstep/lightstep-tracer-common/golang/gogo v0.0.0-20190605223551-b github.com/lightstep/lightstep-tracer-go v0.18.1/go.mod h1:jlF1pusYV4pidLvZ+XD0UBX0ZE6WURAspgAczcDHrL4= github.com/lyft/protoc-gen-validate v0.0.13/go.mod h1:XbGvPuh87YZc5TdIa2/I4pLk0QoUACkjt2znoq26NVQ= github.com/mattn/go-colorable v0.0.9/go.mod h1:9vuHe8Xs5qXnSaW/c/ABM9alt+Vo+STaOChaDxuIBZU= +github.com/mattn/go-ieproxy v0.0.1/go.mod h1:pYabZ6IHcRpFh7vIaLfK7rdcWgFEb3SFJ6/gNWuh88E= github.com/mattn/go-isatty v0.0.3/go.mod h1:M+lRXTBqGeGNdLjl/ufCoiOlB5xdOkqRJdNxMWT7Zi4= github.com/mattn/go-isatty v0.0.4/go.mod h1:M+lRXTBqGeGNdLjl/ufCoiOlB5xdOkqRJdNxMWT7Zi4= github.com/mattn/go-runewidth v0.0.2/go.mod h1:LwmH8dsx7+W8Uxz3IHJYH5QSwggIsqBzpuz5H//U1FU= github.com/matttproud/golang_protobuf_extensions v1.0.1/go.mod h1:D8He9yQNgCq6Z5Ld7szi9bcBfOoFv/3dc6xSMkL2PC0= github.com/miekg/dns v1.0.14/go.mod h1:W1PPwlIAgtquWBMBEV9nkV9Cazfe8ScdGz/Lj7v3Nrg= +github.com/minio/asm2plan9s v0.0.0-20200509001527-cdd76441f9d8 h1:AMFGa4R4MiIpspGNG7Z948v4n35fFGB3RR3G/ry4FWs= github.com/minio/asm2plan9s v0.0.0-20200509001527-cdd76441f9d8/go.mod h1:mC1jAcsrzbxHt8iiaC+zU4b1ylILSosueou12R++wfY= +github.com/minio/c2goasm v0.0.0-20190812172519-36a3d3bbc4f3 h1:+n/aFZefKZp7spd8DFdX7uMikMLXX4oubIzJF4kv/wI= github.com/minio/c2goasm v0.0.0-20190812172519-36a3d3bbc4f3/go.mod h1:RagcQ7I8IeTMnF8JTXieKnO4Z6JCsikNEzj0DwauVzE= github.com/mitchellh/cli v1.0.0/go.mod h1:hNIlj7HEI86fIcpObd7a0FcrxTWetlwJDGcceTlRvqc= github.com/mitchellh/go-homedir v1.0.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0= @@ -286,6 +326,7 @@ github.com/nats-io/nats.go v1.9.1/go.mod h1:ZjDU1L/7fJ09jvUSRVBR2e7+RnLiiIQyqyzE github.com/nats-io/nkeys v0.1.0/go.mod h1:xpnFELMwJABBLVhffcfd1MZx6VsNRFpEugbxziKVo7w= github.com/nats-io/nkeys v0.1.3/go.mod h1:xpnFELMwJABBLVhffcfd1MZx6VsNRFpEugbxziKVo7w= github.com/nats-io/nuid v1.0.1/go.mod h1:19wcPz3Ph3q0Jbyiqsd0kePYG7A95tJPxeL+1OSON2c= +github.com/ncw/swift v1.0.52/go.mod h1:23YIA4yWVnGwv2dQlN4bB7egfYX6YLn0Yo/S6zZO/ZM= github.com/oklog/oklog v0.3.2/go.mod h1:FCV+B7mhrz4o+ueLpx+KqkyXRGMWOYEvfiXtdGtbWGs= github.com/oklog/run v1.0.0/go.mod h1:dlhp/R75TPv97u0XWUtDeV/lRKWPKSdTuV0TZvrmrQA= github.com/olekukonko/tablewriter v0.0.0-20170122224234-a0225b3f23b5/go.mod h1:vsDQFd/mU46D+Z4whnwzcISnGGzXWMclvtLoiIKAKIo= @@ -309,11 +350,11 @@ github.com/performancecopilot/speed v3.0.0+incompatible/go.mod h1:/CLtqpZ5gBg1M9 github.com/phpdave11/gofpdf v1.4.2/go.mod h1:zpO6xFn9yxo3YLyMvW8HcKWVdbNqgIfOOp2dXMnm1mY= github.com/phpdave11/gofpdi v1.0.12/go.mod h1:vBmVV0Do6hSBHC8uKUQ71JGW+ZGQq74llk/7bXwjDoI= github.com/pierrec/lz4 v1.0.2-0.20190131084431-473cd7ce01a1/go.mod h1:3/3N9NVKO0jef7pBehbT1qWhCMrIgbYNnFAZCqQ5LRc= -github.com/pierrec/lz4 v2.0.5+incompatible h1:2xWsjqPFWcplujydGg4WmhC/6fZqK42wMM8aXeqhl0I= github.com/pierrec/lz4 v2.0.5+incompatible/go.mod h1:pdkljMzZIN41W+lC3N2tnIh5sFi+IEE17M5jbnwPHcY= github.com/pierrec/lz4/v4 v4.1.8/go.mod h1:gZWDp/Ze/IJXGXf23ltt2EXimqmTUXEy0GFuRQyBid4= -github.com/pierrec/lz4/v4 v4.1.12 h1:44l88ehTZAUGW4VlO1QC4zkilL99M6Y9MXNwEs0uzP8= github.com/pierrec/lz4/v4 v4.1.12/go.mod h1:gZWDp/Ze/IJXGXf23ltt2EXimqmTUXEy0GFuRQyBid4= +github.com/pierrec/lz4/v4 v4.1.14 h1:+fL8AQEZtz/ijeNnpduH0bROTu0O3NZAlPjQxGn8LwE= +github.com/pierrec/lz4/v4 v4.1.14/go.mod h1:gZWDp/Ze/IJXGXf23ltt2EXimqmTUXEy0GFuRQyBid4= github.com/pkg/errors v0.8.0/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= @@ -380,10 +421,13 @@ github.com/xitongsys/parquet-go v1.5.1/go.mod h1:xUxwM8ELydxh4edHGegYq1pA8NnMKDx github.com/xitongsys/parquet-go v1.6.2 h1:MhCaXii4eqceKPu9BwrjLqyK10oX9WF+xGhwvwbw7xM= github.com/xitongsys/parquet-go v1.6.2/go.mod h1:IulAQyalCm0rPiZVNnCgm/PCL64X2tdSVGMQ/UeKqWA= github.com/xitongsys/parquet-go-source v0.0.0-20190524061010-2b72cbee77d5/go.mod h1:xxCx7Wpym/3QCo6JhujJX51dzSXrwmb0oH6FQb39SEA= -github.com/xitongsys/parquet-go-source v0.0.0-20200817004010-026bad9b25d0 h1:a742S4V5A15F93smuVxA60LQWsrCnN8bKeWDBARU1/k= github.com/xitongsys/parquet-go-source v0.0.0-20200817004010-026bad9b25d0/go.mod h1:HYhIKsdns7xz80OgkbgJYrtQY7FjHWHKH6cvN7+czGE= +github.com/xitongsys/parquet-go-source v0.0.0-20220315005136-aec0fe3e777c h1:UDtocVeACpnwauljUbeHD9UOjjcvF5kLUHruww7VT9A= +github.com/xitongsys/parquet-go-source v0.0.0-20220315005136-aec0fe3e777c/go.mod h1:qLb2Itmdcp7KPa5KZKvhE9U1q5bYSOmgeOckF/H2rQA= github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= +github.com/yuin/goldmark v1.3.5/go.mod h1:mwnBkeHKe2W/ZEtQ+71ViKU8L12m81fl3OWwC1Zlc8k= github.com/yuin/goldmark v1.4.1/go.mod h1:mwnBkeHKe2W/ZEtQ+71ViKU8L12m81fl3OWwC1Zlc8k= +github.com/zeebo/xxh3 v1.0.1 h1:FMSRIbkrLikb/0hZxmltpg84VkqDAT5M8ufXynuhXsI= github.com/zeebo/xxh3 v1.0.1/go.mod h1:8VHV24/3AZLn3b6Mlp/KuC33LWH687Wq6EnziEB+rsA= go.etcd.io/bbolt v1.3.3/go.mod h1:IbVyRI1SCnLcuJnV2u8VeU0CEYM7e686BmAb1XKL+uU= go.etcd.io/etcd v0.0.0-20191023171146-3cf2f69b5738/go.mod h1:dnLIgRNXwCJa5e+c6mIZCrds/GIG4ncV9HhK5PX7jPg= @@ -420,6 +464,8 @@ golang.org/x/crypto v0.0.0-20190605123033-f99c8df09eb5/go.mod h1:yigFU9vqHzYiE8U golang.org/x/crypto v0.0.0-20190701094942-4def268fd1a4/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= +golang.org/x/crypto v0.0.0-20201002170205-7f63de1d35b0/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= +golang.org/x/crypto v0.0.0-20210513164829-c07d793c2f9a/go.mod h1:P+XmwS30IXTQdn5tA2iutPOUgjI07+tq3H3K9MVA1s8= golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= golang.org/x/exp v0.0.0-20180321215751-8460e604b9de/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= golang.org/x/exp v0.0.0-20180807140117-3d87b88a115f/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= @@ -456,6 +502,7 @@ golang.org/x/lint v0.0.0-20190909230951-414d861bb4ac/go.mod h1:6SW0HCj/g11FgYtHl golang.org/x/lint v0.0.0-20190930215403-16217165b5de/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc= golang.org/x/lint v0.0.0-20191125180803-fdd1cda4f05f/go.mod h1:5qLYkcX4OjUUV8bRuDixDT3tpyyb+LUpUlRWLxfhWrs= golang.org/x/lint v0.0.0-20200130185559-910be7a94367/go.mod h1:3xt1FjdF8hUf6vQPIChWIBhFzV8gjjsPE/fR3IyQdNY= +golang.org/x/lint v0.0.0-20210508222113-6edffad5e616/go.mod h1:3xt1FjdF8hUf6vQPIChWIBhFzV8gjjsPE/fR3IyQdNY= golang.org/x/mobile v0.0.0-20190312151609-d3739f865fa6/go.mod h1:z+o9i4GpDbdi3rU15maQ/Ox0txvL9dWGYEHz965HBQE= golang.org/x/mobile v0.0.0-20190719004257-d2bd2a29d028/go.mod h1:E/iHnbuqvinMTCcRqshq8CkpyQDoeVncDDYHnLhea+o= golang.org/x/mobile v0.0.0-20201217150744-e6ae53a27f4f/go.mod h1:skQtrUTUwhdJvXM/2KKJzY8pDgNr9I/FOMqDVRPBUS4= @@ -466,9 +513,11 @@ golang.org/x/mod v0.1.1-0.20191107180719-034126e5016b/go.mod h1:QqPTAvyqsEbceGzB golang.org/x/mod v0.1.1-0.20191209134235-331c550502dd/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= +golang.org/x/mod v0.4.2/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.5.1/go.mod h1:5OXOZSfqPIIbmVBIIKWRFfZjPR0E5r58TLhUjH0a2Ro= -golang.org/x/mod v0.6.0-dev.0.20211013180041-c96bc1413d57 h1:LQmS1nU0twXLA96Kt7U9qtHJEbBk3z6Q0V4UXjZkpr4= golang.org/x/mod v0.6.0-dev.0.20211013180041-c96bc1413d57/go.mod h1:3p9vT2HGsQu2K1YbXdKPJLVgG5VJdoTa1poYQBtP1AY= +golang.org/x/mod v0.6.0-dev.0.20220106191415-9b9b3d81d5e3 h1:kQgndtyPBW/JIYERgdxfwMYh3AVStj88WQTlNDi2a+o= +golang.org/x/mod v0.6.0-dev.0.20220106191415-9b9b3d81d5e3/go.mod h1:3p9vT2HGsQu2K1YbXdKPJLVgG5VJdoTa1poYQBtP1AY= golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20180906233101-161cd47e91fd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= @@ -488,6 +537,7 @@ golang.org/x/net v0.0.0-20190613194153-d28f0bde5980/go.mod h1:z5CRVTTTmAJ677TzLL golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20190724013045-ca1201d0de80/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20190813141303-74dc4d7220e7/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20191112182307-2180aed22343/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20191209160850-c0dbc17a3553/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20200114155413-6afb5195e5aa/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20200202094626-16171245cfb2/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= @@ -496,6 +546,7 @@ golang.org/x/net v0.0.0-20200822124328-c89045814202/go.mod h1:/O7V0waA8r7cgGh81R golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= golang.org/x/net v0.0.0-20210405180319-a5a99cb37ef4/go.mod h1:p54w0d4576C0XHj96bSt6lcn1PtDYWL6XObtHCRCNQM= +golang.org/x/net v0.0.0-20210614182718-04defd469f4e/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= golang.org/x/net v0.0.0-20211015210444-4f30a5c0130f/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= golang.org/x/net v0.0.0-20220127200216-cd36cc0744dd/go.mod h1:CfG3xpIq0wQ8r1q4Su4UZFWDARRcnwPjda9FqA0JpMk= golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= @@ -531,6 +582,7 @@ golang.org/x/sys v0.0.0-20190726091711-fc99dfbffb4e/go.mod h1:h1NjWce9XRLGQEsW7w golang.org/x/sys v0.0.0-20190826190057-c7b8b68b1456/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20191001151750-bb3f8db39f24/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20191026070338-33540a1f6037/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20191112214154-59a1497f0cea/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20191204072324-ce4227a45e2e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20191220142924-d4481acd189f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20191228213918-04cbcbbfeed8/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= @@ -540,6 +592,7 @@ golang.org/x/sys v0.0.0-20200202164722-d101bd2416d5/go.mod h1:h1NjWce9XRLGQEsW7w golang.org/x/sys v0.0.0-20200212091648-12a6c2dcc1e4/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200223170610-d5e6a3e2c0ae/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200828194041-157a740278f4/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210119212857-b64e53b001e4/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= @@ -548,11 +601,12 @@ golang.org/x/sys v0.0.0-20210330210617-4fbd30eecc44/go.mod h1:h1NjWce9XRLGQEsW7w golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210510120138-977fb7262007/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20210630005230-0f9fa26af87c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20211019181941-9d821ace8654/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20211216021012-1d35b9e2eb4e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.0.0-20220128215802-99c3d69c2c27/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.0.0-20220209214540-3681064d5158 h1:rm+CHSpPEEW2IsXUib1ThaHIjuBVZjxNgSKmBLFfD4c= -golang.org/x/sys v0.0.0-20220209214540-3681064d5158/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220412211240-33da011f77ad/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220422013727-9388b58f7150 h1:xHms4gcpe1YE7A3yIllJXP16CMAGuqwO2lX1mTyyRRc= +golang.org/x/sys v0.0.0-20220422013727-9388b58f7150/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= golang.org/x/text v0.0.0-20170915032832-14c0d48ead0c/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= @@ -607,14 +661,16 @@ golang.org/x/tools v0.0.0-20200207183749-b753a1ba74fa/go.mod h1:TB2adYChydJhpapK golang.org/x/tools v0.0.0-20200212150539-ea181f53ac56/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= golang.org/x/tools v0.0.0-20200224181240-023911ca70b2/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= golang.org/x/tools v0.1.0/go.mod h1:xkSsbof2nBLbhDlRMhhhyNLN/zl3eTqcnHD5viDpcZ0= +golang.org/x/tools v0.1.4/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk= golang.org/x/tools v0.1.8-0.20211029000441-d6a9af8af023/go.mod h1:nABZi5QlRsZVlzPpHl034qft6wpY4eDcsTt5AaioBiU= -golang.org/x/tools v0.1.9 h1:j9KsMiaP1c3B0OTQGth0/k+miLGTgLsAFUCrF2vLcF8= -golang.org/x/tools v0.1.9/go.mod h1:nABZi5QlRsZVlzPpHl034qft6wpY4eDcsTt5AaioBiU= +golang.org/x/tools v0.1.10 h1:QjFRCZxdOhBJ/UNgnBZLbNV13DlbnK0quyivTnXJM20= +golang.org/x/tools v0.1.10/go.mod h1:Uh6Zz+xoGYZom868N8YTex3t7RhtHDBrE8Gzo9bV56E= golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= -golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1 h1:go1bK/D/BFZV2I8cIQd1NKEZ+0owSTG1fDTci4IqFcE= golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20220411194840-2f41105eb62f h1:GGU+dLjvlC3qDwqYgL6UgRmHXhOOgns0bZu2Ty5mm6U= +golang.org/x/xerrors v0.0.0-20220411194840-2f41105eb62f/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= gonum.org/v1/gonum v0.0.0-20180816165407-929014505bf4/go.mod h1:Y+Yx5eoAFn32cQvJDxZx5Dpnq+c3wtXuadVZAcxbbBo= gonum.org/v1/gonum v0.8.2/go.mod h1:oe/vMfY3deqTw+1EZJhuvEW2iwGF1bW9wwu7XCu0+v0= gonum.org/v1/gonum v0.9.3 h1:DnoIG+QAMaF5NvxnGe/oKsgKcAc6PcUyl8q0VetfQ8s= @@ -658,6 +714,7 @@ google.golang.org/genproto v0.0.0-20200212174721-66ed5ce911ce/go.mod h1:55QSHmfG google.golang.org/genproto v0.0.0-20200224152610-e50cd9704f63/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c= google.golang.org/genproto v0.0.0-20200513103714-09dca8ec2884/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c= google.golang.org/genproto v0.0.0-20200526211855-cb27e3aa2013/go.mod h1:NbSheEEYHJ7i3ixzK3sjbqSGDJWnxyFXZblF3eUsNvo= +google.golang.org/genproto v0.0.0-20210630183607-d20f26d13c79/go.mod h1:yiaVoXHpRzHGyxV3o4DktVWY4mSUErTKaeEOq6C3t3U= google.golang.org/genproto v0.0.0-20220126215142-9970aeb2e350/go.mod h1:5CzLGKJ67TSI2B9POpiiyGha0AjJvZIUgRMt1dSmuhc= google.golang.org/grpc v1.17.0/go.mod h1:6QZJwpn2B+Zp71q/5VxRsJ6NXXVCE5NRUHRo+f3cWCs= google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c= @@ -674,6 +731,8 @@ google.golang.org/grpc v1.27.0/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8 google.golang.org/grpc v1.27.1/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk= google.golang.org/grpc v1.33.1/go.mod h1:fr5YgcSWrqhRRxogOsw7RzIpsmvOZ6IcH4kBYTpR3n0= google.golang.org/grpc v1.36.0/go.mod h1:qjiiYl8FncCW8feJPdyg3v6XW24KsRHe+dy9BAGRRjU= +google.golang.org/grpc v1.38.0/go.mod h1:NREThFqKR1f3iQ6oBuvc5LadQuXVGo9rkm5ZGrQdJfM= +google.golang.org/grpc v1.39.0/go.mod h1:PImNr+rS9TWYb2O4/emRugxiyHZ5JyHW5F+RPnDzfrE= google.golang.org/grpc v1.40.0/go.mod h1:ogyxbiOoUXAkP+4+xa6PZSE9DZgIHtSpzjDTB9KAK34= google.golang.org/grpc v1.44.0/go.mod h1:k+4IHHFw41K8+bbowsex27ge2rCb65oeWqe4jJ590SU= google.golang.org/protobuf v0.0.0-20200109180630-ec00e32a8dfd/go.mod h1:DFci5gLYBciE7Vtevhsrf46CRTquxDuWsQurQQe4oz8= @@ -709,6 +768,7 @@ gopkg.in/yaml.v2 v2.0.0-20170812160011-eb3733d160e7/go.mod h1:JAlM8MvJe8wmxCU4Bl gopkg.in/yaml.v2 v2.2.1/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.2.3/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= +gopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c h1:dUUwHk2QECo/6vqA44rthZ8ie2QXMNeKRTHCNY2nXvo= gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= honnef.co/go/tools v0.0.0-20180728063816-88497007e858/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= diff --git a/scripts/count-code-lines.sh b/scripts/count-code-lines.sh deleted file mode 100755 index 4da9ec2..0000000 --- a/scripts/count-code-lines.sh +++ /dev/null @@ -1,11 +0,0 @@ -#!/bin/zsh - -echo nb of code lines without tests: -cat $(ls **/*.go | grep -v vendor/ | grep -vE '^.*_test.go$') | wc -l -echo nb of code lines: -cat $(ls **/*.go | grep -v vendor/) | wc -l - -(for file in $(ls **/*.go | grep -v vendor/ ) - do - git blame ${file} -done) | cut -f2 -d\( | cut -f1 -d\ | grep -vE '.*.go' | sort | uniq -c From c43a4d363cf38729c2e27a466956c5a9740b56b0 Mon Sep 17 00:00:00 2001 From: agelloz Date: Thu, 28 Apr 2022 10:49:10 +0200 Subject: [PATCH 06/29] clean up --- CHANGELOG.md | 4 + XXXexamples_test.go | 23 +++++- bowappend.go | 33 ++++++--- bowassertion.go | 1 + bowbuffer.go | 12 +-- bowconvert.go | 132 ++++++++++++++++++++++++++++++++- bowconvert_test.go | 174 ++++++++++++++++++++++++++++++++++++++------ bowfill.go | 4 +- bowgetters.go | 14 +++- bowgetters_test.go | 2 + bowjoin.go | 110 +++++++++++++++++++++++----- bowjoin_test.go | 2 + bowjson.go | 8 +- bowparquet.go | 8 +- bowparquet_test.go | 47 ++++++++---- bowrecord.go | 4 +- bowseries.go | 17 +++-- bowseries_test.go | 1 - bowsort.go | 1 - bowsort_test.go | 24 ++++++ bowstring.go | 34 +++++---- bowtypes.go | 52 ++++++------- bowvalues.go | 10 +-- 23 files changed, 572 insertions(+), 145 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a5df524..37f7297 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,10 @@ UNRELEASED [XXXX-XX-XX] - bump to arrow/go/v8 - remove useless count script - add gci linter + - add support for Arrow Timestamps types + - prepare for the use of new go 1.18 generics by removing genius file generation +- Parquet + - use the new arrow/go/v8/parquet package v0.18.0 [2022-02-16] ------------------- diff --git a/XXXexamples_test.go b/XXXexamples_test.go index 80d8f28..4886511 100644 --- a/XXXexamples_test.go +++ b/XXXexamples_test.go @@ -23,7 +23,6 @@ func ExampleNewBow() { // 2 // 3 3.3 true // 4 4 false - // metadata: [] } func ExampleNewBowFromColBasedInterfaces() { @@ -47,7 +46,27 @@ func ExampleNewBowFromColBasedInterfaces() { // 1 1 1.1 // 1 2 // 3 3 1.3 - // metadata: [] +} + +func ExampleNewBowFromRowBasedInterfaces() { + b, err := NewBowFromRowBasedInterfaces( + []string{"time", "int", "float"}, + []Type{TimestampMilli, Int64, Float64}, + [][]interface{}{ + {"2022-04-27T00:00:00Z", 1, 1.1}, + {"2022-04-27T01:00:00Z", 2, 2.2}, + {"2022-04-27T02:00:00Z", 3, 3.3}, + }) + if err != nil { + panic(err) + } + + fmt.Println(b) + // Output: + // time:timestamp[ms, tz=UTC] int:int64 float:float64 + // 2022-04-27T00:00:00Z 1 1.1 + // 2022-04-27T01:00:00Z 2 2.2 + // 2022-04-27T02:00:00Z 3 3.3 } func ExampleBow_MarshalJSON() { diff --git a/bowappend.go b/bowappend.go index ffbc81a..9560565 100644 --- a/bowappend.go +++ b/bowappend.go @@ -39,11 +39,11 @@ func AppendBows(bows ...Bow) (Bow, error) { for _, b := range bows { if colType := b.ColumnType(colIndex); colType != refType { return nil, fmt.Errorf( - "bow.AppendBows: incompatible types %v and %v", refType, colType) + "incompatible types '%s' and '%s'", refType, colType) } data := b.(*bow).Column(colIndex).Data() arr := array.NewInt64Data(data) - v := Int64Values(arr) + v := int64Values(arr) valid := getValiditySlice(arr) builder.AppendValues(v, valid) } @@ -54,11 +54,11 @@ func AppendBows(bows ...Bow) (Bow, error) { for _, b := range bows { if colType := b.ColumnType(colIndex); colType != refType { return nil, fmt.Errorf( - "bow.AppendBows: incompatible types %v and %v", refType, colType) + "incompatible types '%s' and '%s'", refType, colType) } data := b.(*bow).Column(colIndex).Data() arr := array.NewFloat64Data(data) - v := Float64Values(arr) + v := float64Values(arr) valid := getValiditySlice(arr) builder.AppendValues(v, valid) } @@ -69,11 +69,11 @@ func AppendBows(bows ...Bow) (Bow, error) { for _, b := range bows { if colType := b.ColumnType(colIndex); colType != refType { return nil, fmt.Errorf( - "bow.AppendBows: incompatible types %v and %v", refType, colType) + "incompatible types '%s' and '%s'", refType, colType) } data := b.(*bow).Column(colIndex).Data() arr := array.NewBooleanData(data) - v := BooleanValues(arr) + v := booleanValues(arr) valid := getValiditySlice(arr) builder.AppendValues(v, valid) } @@ -84,17 +84,32 @@ func AppendBows(bows ...Bow) (Bow, error) { for _, b := range bows { if colType := b.ColumnType(colIndex); colType != refType { return nil, fmt.Errorf( - "bow.AppendBows: incompatible types %v and %v", refType, colType) + "incompatible types '%s' and '%s'", refType, colType) } data := b.(*bow).Column(colIndex).Data() arr := array.NewStringData(data) - v := StringValues(arr) + v := stringValues(arr) + valid := getValiditySlice(arr) + builder.AppendValues(v, valid) + } + newArray = builder.NewArray() + case TimestampSec, TimestampMilli, TimestampMicro, TimestampNano: + builder := array.NewTimestampBuilder(mem, mapBowToArrowDataTypes[refType].(*arrow.TimestampType)) + builder.Resize(numRows) + for _, b := range bows { + if colType := b.ColumnType(colIndex); colType != refType { + return nil, fmt.Errorf( + "incompatible types '%s' and '%s'", refType, colType) + } + data := b.(*bow).Column(colIndex).Data() + arr := array.NewTimestampData(data) + v := timestampValues(arr) valid := getValiditySlice(arr) builder.AppendValues(v, valid) } newArray = builder.NewArray() default: - return nil, fmt.Errorf("unsupported type %v", refType) + return nil, fmt.Errorf("unsupported type '%s'", refType) } seriesSlice[colIndex] = Series{ diff --git a/bowassertion.go b/bowassertion.go index 2bcc297..a3c39eb 100644 --- a/bowassertion.go +++ b/bowassertion.go @@ -12,6 +12,7 @@ const ( // IsColSorted returns a boolean whether the column colIndex is sorted or not, skipping nil values. // An empty column or an unsupported data type returns false. +// Supports only Int64 and Float64. func (b *bow) IsColSorted(colIndex int) bool { if b.IsColEmpty(colIndex) { return false diff --git a/bowbuffer.go b/bowbuffer.go index 64b9c83..44dafa6 100644 --- a/bowbuffer.go +++ b/bowbuffer.go @@ -91,7 +91,7 @@ func (b *Buffer) SetOrDropStrict(i int, value interface{}) { case String: b.Data.([]string)[i], valid = value.(string) case TimestampSec, TimestampMilli, TimestampMicro, TimestampNano: - b.Data.([]arrow.Timestamp)[i], valid = b.DataType.Convert(value).(arrow.Timestamp) + b.Data.([]arrow.Timestamp)[i], valid = value.(arrow.Timestamp) default: panic(fmt.Errorf("unsupported type '%s'", b.DataType)) } @@ -150,35 +150,35 @@ func (b *bow) NewBufferFromCol(colIndex int) Buffer { nullBitmapBytes := arr.NullBitmapBytes()[:bitutil.CeilByte(arr.Data().Len())/8] nullBitmapBytesCopy := make([]byte, len(nullBitmapBytes)) copy(nullBitmapBytesCopy, nullBitmapBytes) - res.Data = Int64Values(arr) + res.Data = int64Values(arr) res.nullBitmapBytes = nullBitmapBytesCopy case Float64: arr := array.NewFloat64Data(arrayData) nullBitmapBytes := arr.NullBitmapBytes()[:bitutil.CeilByte(arr.Data().Len())/8] nullBitmapBytesCopy := make([]byte, len(nullBitmapBytes)) copy(nullBitmapBytesCopy, nullBitmapBytes) - res.Data = Float64Values(arr) + res.Data = float64Values(arr) res.nullBitmapBytes = nullBitmapBytesCopy case Bool: arr := array.NewBooleanData(arrayData) nullBitmapBytes := arr.NullBitmapBytes()[:bitutil.CeilByte(arr.Data().Len())/8] nullBitmapBytesCopy := make([]byte, len(nullBitmapBytes)) copy(nullBitmapBytesCopy, nullBitmapBytes) - res.Data = BooleanValues(arr) + res.Data = booleanValues(arr) res.nullBitmapBytes = nullBitmapBytesCopy case String: arr := array.NewStringData(arrayData) nullBitmapBytes := arr.NullBitmapBytes()[:bitutil.CeilByte(arr.Data().Len())/8] nullBitmapBytesCopy := make([]byte, len(nullBitmapBytes)) copy(nullBitmapBytesCopy, nullBitmapBytes) - res.Data = StringValues(arr) + res.Data = stringValues(arr) res.nullBitmapBytes = nullBitmapBytesCopy case TimestampSec, TimestampMilli, TimestampMicro, TimestampNano: arr := array.NewTimestampData(arrayData) nullBitmapBytes := arr.NullBitmapBytes()[:bitutil.CeilByte(arr.Data().Len())/8] nullBitmapBytesCopy := make([]byte, len(nullBitmapBytes)) copy(nullBitmapBytesCopy, nullBitmapBytes) - res.Data = TimestampValues(arr) + res.Data = timestampValues(arr) res.nullBitmapBytes = nullBitmapBytesCopy default: panic(fmt.Errorf("unsupported type '%s'", b.ColumnType(colIndex))) diff --git a/bowconvert.go b/bowconvert.go index fe46065..b4c6a17 100644 --- a/bowconvert.go +++ b/bowconvert.go @@ -139,7 +139,10 @@ func ToString(i interface{}) (string, bool) { } } -func ToTimestamp(i interface{}) (arrow.Timestamp, bool) { +// ToTimestampSec returns an arrow.Timestamp value and a bool whether the conversion was successful or not. +// String values are first interpreted with strconv.ParseInt. +// If it fails, the values are parsed with arrow.TimestampFromString with the arrow.Second time unit. +func ToTimestampSec(i interface{}) (arrow.Timestamp, bool) { switch v := i.(type) { case json.Number: val, err := v.Int64() @@ -165,7 +168,134 @@ func ToTimestamp(i interface{}) (arrow.Timestamp, bool) { return 0, true case string: val, err := strconv.ParseInt(v, 10, 64) + if err == nil { + return arrow.Timestamp(val), true + } + ts, err := arrow.TimestampFromString(v, arrow.Second) + return ts, err == nil + case arrow.Timestamp: + return v, true + default: + return 0, false + } +} + +// ToTimestampMilli returns an arrow.Timestamp value and a bool whether the conversion was successful or not. +// String values are first interpreted with strconv.ParseInt. +// If it fails, the values are parsed with arrow.TimestampFromString with the arrow.Millisecond time unit. +func ToTimestampMilli(i interface{}) (arrow.Timestamp, bool) { + switch v := i.(type) { + case json.Number: + val, err := v.Int64() + return arrow.Timestamp(val), err == nil + case int: + return arrow.Timestamp(v), true + case int8: + return arrow.Timestamp(v), true + case int16: + return arrow.Timestamp(v), true + case int32: + return arrow.Timestamp(v), true + case int64: + return arrow.Timestamp(v), true + case float32: + return arrow.Timestamp(v), true + case float64: + return arrow.Timestamp(v), true + case bool: + if v { + return 1, true + } + return 0, true + case string: + val, err := strconv.ParseInt(v, 10, 64) + if err == nil { + return arrow.Timestamp(val), true + } + ts, err := arrow.TimestampFromString(v, arrow.Millisecond) + return ts, err == nil + case arrow.Timestamp: + return v, true + default: + return 0, false + } +} + +// ToTimestampMicro returns an arrow.Timestamp value and a bool whether the conversion was successful or not. +// String values are first interpreted with strconv.ParseInt. +// If it fails, the values are parsed with arrow.TimestampFromString with the arrow.Microsecond time unit. +func ToTimestampMicro(i interface{}) (arrow.Timestamp, bool) { + switch v := i.(type) { + case json.Number: + val, err := v.Int64() return arrow.Timestamp(val), err == nil + case int: + return arrow.Timestamp(v), true + case int8: + return arrow.Timestamp(v), true + case int16: + return arrow.Timestamp(v), true + case int32: + return arrow.Timestamp(v), true + case int64: + return arrow.Timestamp(v), true + case float32: + return arrow.Timestamp(v), true + case float64: + return arrow.Timestamp(v), true + case bool: + if v { + return 1, true + } + return 0, true + case string: + val, err := strconv.ParseInt(v, 10, 64) + if err == nil { + return arrow.Timestamp(val), true + } + ts, err := arrow.TimestampFromString(v, arrow.Microsecond) + return ts, err == nil + case arrow.Timestamp: + return v, true + default: + return 0, false + } +} + +// ToTimestampNano returns an arrow.Timestamp value and a bool whether the conversion was successful or not. +// String values are first interpreted with strconv.ParseInt. +// If it fails, the values are parsed with arrow.TimestampFromString with the arrow.Nanosecond time unit. +func ToTimestampNano(i interface{}) (arrow.Timestamp, bool) { + switch v := i.(type) { + case json.Number: + val, err := v.Int64() + return arrow.Timestamp(val), err == nil + case int: + return arrow.Timestamp(v), true + case int8: + return arrow.Timestamp(v), true + case int16: + return arrow.Timestamp(v), true + case int32: + return arrow.Timestamp(v), true + case int64: + return arrow.Timestamp(v), true + case float32: + return arrow.Timestamp(v), true + case float64: + return arrow.Timestamp(v), true + case bool: + if v { + return 1, true + } + return 0, true + case string: + val, err := strconv.ParseInt(v, 10, 64) + if err == nil { + return arrow.Timestamp(val), true + } + ts, err := arrow.TimestampFromString(v, arrow.Nanosecond) + return ts, err == nil case arrow.Timestamp: return v, true default: diff --git a/bowconvert_test.go b/bowconvert_test.go index 20fb346..2e53585 100644 --- a/bowconvert_test.go +++ b/bowconvert_test.go @@ -2,6 +2,7 @@ package bow import ( "testing" + "time" "github.com/apache/arrow/go/v8/arrow" "github.com/stretchr/testify/assert" @@ -127,26 +128,155 @@ func TestToString(t *testing.T) { } func TestToTimestamp(t *testing.T) { - var v arrow.Timestamp - var ok bool - - v, ok = ToTimestamp(true) - require.True(t, ok) - assert.Equal(t, arrow.Timestamp(1), v) - - v, ok = ToTimestamp(false) - require.True(t, ok) - assert.Equal(t, arrow.Timestamp(0), v) - - v, ok = ToTimestamp(0.) - require.True(t, ok) - assert.Equal(t, arrow.Timestamp(0), v) - - v, ok = ToTimestamp(0) - require.True(t, ok) - assert.Equal(t, arrow.Timestamp(0), v) - - v, ok = ToTimestamp("0") - require.True(t, ok) - assert.Equal(t, arrow.Timestamp(0), v) + t.Run("Sec", func(t *testing.T) { + v, ok := ToTimestampSec(true) + require.True(t, ok) + assert.Equal(t, arrow.Timestamp(1), v) + + v, ok = ToTimestampSec(false) + require.True(t, ok) + assert.Equal(t, arrow.Timestamp(0), v) + + v, ok = ToTimestampSec(0.) + require.True(t, ok) + assert.Equal(t, arrow.Timestamp(0), v) + + v, ok = ToTimestampSec(0) + require.True(t, ok) + assert.Equal(t, arrow.Timestamp(0), v) + + v, ok = ToTimestampSec("0") + require.True(t, ok) + assert.Equal(t, arrow.Timestamp(0), v) + assert.Equal(t, "1970-01-01T00:00:00Z", + v.ToTime(arrow.Second).Format(time.RFC3339Nano)) + + formattedTimeSec := "2022-04-27T00:00:00Z" + ti, err := time.Parse(time.RFC3339, formattedTimeSec) + require.NoError(t, err) + + v, ok = ToTimestampSec(ti.Unix()) + require.True(t, ok) + assert.Equal(t, "2022-04-27T00:00:00Z", + v.ToTime(arrow.Second).Format(time.RFC3339Nano)) + + v, ok = ToTimestampSec(formattedTimeSec) + require.True(t, ok) + assert.Equal(t, "2022-04-27T00:00:00Z", + v.ToTime(arrow.Second).Format(time.RFC3339Nano)) + }) + + t.Run("Milli", func(t *testing.T) { + v, ok := ToTimestampMilli(true) + require.True(t, ok) + assert.Equal(t, arrow.Timestamp(1), v) + + v, ok = ToTimestampMilli(false) + require.True(t, ok) + assert.Equal(t, arrow.Timestamp(0), v) + + v, ok = ToTimestampMilli(0.) + require.True(t, ok) + assert.Equal(t, arrow.Timestamp(0), v) + + v, ok = ToTimestampMilli(0) + require.True(t, ok) + assert.Equal(t, arrow.Timestamp(0), v) + + v, ok = ToTimestampMilli("0") + require.True(t, ok) + assert.Equal(t, arrow.Timestamp(0), v) + assert.Equal(t, "1970-01-01T00:00:00Z", + v.ToTime(arrow.Millisecond).Format(time.RFC3339Nano)) + + formattedTimeMilli := "2022-04-27T00:00:00.123Z" + ti, err := time.Parse(time.RFC3339, formattedTimeMilli) + require.NoError(t, err) + + v, ok = ToTimestampMilli(ti.UnixMilli()) + require.True(t, ok) + assert.Equal(t, "2022-04-27T00:00:00.123Z", + v.ToTime(arrow.Millisecond).Format(time.RFC3339Nano)) + + v, ok = ToTimestampMilli(formattedTimeMilli) + require.True(t, ok) + assert.Equal(t, "2022-04-27T00:00:00.123Z", + v.ToTime(arrow.Millisecond).Format(time.RFC3339Nano)) + }) + + t.Run("Micro", func(t *testing.T) { + v, ok := ToTimestampMicro(true) + require.True(t, ok) + assert.Equal(t, arrow.Timestamp(1), v) + + v, ok = ToTimestampMicro(false) + require.True(t, ok) + assert.Equal(t, arrow.Timestamp(0), v) + + v, ok = ToTimestampMicro(0.) + require.True(t, ok) + assert.Equal(t, arrow.Timestamp(0), v) + + v, ok = ToTimestampMicro(0) + require.True(t, ok) + assert.Equal(t, arrow.Timestamp(0), v) + + v, ok = ToTimestampMicro("0") + require.True(t, ok) + assert.Equal(t, arrow.Timestamp(0), v) + assert.Equal(t, "1970-01-01T00:00:00Z", + v.ToTime(arrow.Microsecond).Format(time.RFC3339Nano)) + + formattedTimeMicro := "2022-04-27T00:00:00.123456Z" + ti, err := time.Parse(time.RFC3339, formattedTimeMicro) + require.NoError(t, err) + + v, ok = ToTimestampMicro(ti.UnixMicro()) + require.True(t, ok) + assert.Equal(t, "2022-04-27T00:00:00.123456Z", + v.ToTime(arrow.Microsecond).Format(time.RFC3339Nano)) + + v, ok = ToTimestampMicro(formattedTimeMicro) + require.True(t, ok) + assert.Equal(t, "2022-04-27T00:00:00.123456Z", + v.ToTime(arrow.Microsecond).Format(time.RFC3339Nano)) + }) + + t.Run("Nano", func(t *testing.T) { + v, ok := ToTimestampNano(true) + require.True(t, ok) + assert.Equal(t, arrow.Timestamp(1), v) + + v, ok = ToTimestampNano(false) + require.True(t, ok) + assert.Equal(t, arrow.Timestamp(0), v) + + v, ok = ToTimestampNano(0.) + require.True(t, ok) + assert.Equal(t, arrow.Timestamp(0), v) + + v, ok = ToTimestampNano(0) + require.True(t, ok) + assert.Equal(t, arrow.Timestamp(0), v) + + v, ok = ToTimestampNano("0") + require.True(t, ok) + assert.Equal(t, arrow.Timestamp(0), v) + assert.Equal(t, "1970-01-01T00:00:00Z", + v.ToTime(arrow.Nanosecond).Format(time.RFC3339Nano)) + + formattedTimeNano := "2022-04-27T00:00:00.123456789Z" + ti, err := time.Parse(time.RFC3339, formattedTimeNano) + require.NoError(t, err) + + v, ok = ToTimestampNano(ti.UnixNano()) + require.True(t, ok) + assert.Equal(t, "2022-04-27T00:00:00.123456789Z", + v.ToTime(arrow.Nanosecond).Format(time.RFC3339Nano)) + + v, ok = ToTimestampNano(formattedTimeNano) + require.True(t, ok) + assert.Equal(t, "2022-04-27T00:00:00.123456789Z", + v.ToTime(arrow.Nanosecond).Format(time.RFC3339Nano)) + }) } diff --git a/bowfill.go b/bowfill.go index fcb5d08..8ae2256 100644 --- a/bowfill.go +++ b/bowfill.go @@ -8,9 +8,11 @@ import ( "github.com/apache/arrow/go/v8/arrow/array" ) +// TODO: add support for timestamp types + // FillLinear fills the column toFillColIndex using the Linear interpolation method according // to the reference column refColIndex, which has to be sorted. -// Fills only int64 and float64 types. +// Fills only Int64 and Float64 types. func (b *bow) FillLinear(refColIndex, toFillColIndex int) (Bow, error) { if refColIndex < 0 || refColIndex > b.NumCols()-1 { return nil, fmt.Errorf("bow.FillLinear: refColIndex is out of range") diff --git a/bowgetters.go b/bowgetters.go index 1ada8ac..28bc4c8 100644 --- a/bowgetters.go +++ b/bowgetters.go @@ -143,9 +143,12 @@ func (b *bow) GetInt64(colIndex, rowIndex int) (int64, bool) { return ToInt64(vd.Value(rowIndex)) } return 0., false + case arrow.TIMESTAMP: + vd := array.NewTimestampData(b.Column(colIndex).Data()) + return int64(vd.Value(rowIndex)), vd.IsValid(rowIndex) default: - panic(fmt.Errorf("unsupported type '%s'", - b.Schema().Field(colIndex).Type.Name())) + panic(fmt.Errorf("unsupported arrow.DataType '%s'", + b.Schema().Field(colIndex).Type)) } } @@ -198,9 +201,12 @@ func (b *bow) GetFloat64(colIndex, rowIndex int) (float64, bool) { return ToFloat64(vd.Value(rowIndex)) } return 0., false + case arrow.TIMESTAMP: + vd := array.NewTimestampData(b.Column(colIndex).Data()) + return float64(vd.Value(rowIndex)), vd.IsValid(rowIndex) default: - panic(fmt.Sprintf("unsupported type '%s'", - b.Schema().Field(colIndex).Type.Name())) + panic(fmt.Sprintf("unsupported arrow.DataType '%s'", + b.Schema().Field(colIndex).Type)) } } diff --git a/bowgetters_test.go b/bowgetters_test.go index 5b5f2bf..156cc50 100644 --- a/bowgetters_test.go +++ b/bowgetters_test.go @@ -66,4 +66,6 @@ func TestBow_Distinct(t *testing.T) { ExpectEqual(t, expect, res) }) + + // TODO: add tests for timestamp types } diff --git a/bowjoin.go b/bowjoin.go index 6da6767..bb347f8 100644 --- a/bowjoin.go +++ b/bowjoin.go @@ -39,7 +39,7 @@ func (b *bow) InnerJoin(other Bow) Bow { newSeries := make([]Series, newNumCols) newNumRows := len(commonRows.l) - innerFillLeftBowCols(&newSeries, left, right, + innerFillLeftBowCols(&newSeries, left, newNumRows, commonRows) innerFillRightBowCols(&newSeries, left, right, newNumRows, newNumCols, commonCols, commonRows) @@ -185,12 +185,12 @@ func getCommonRows(left, right Bow, commonColBufs map[string][]Buffer) CommonRow return commonRows } -func innerFillLeftBowCols(newSeries *[]Series, left, right *bow, newNumRows int, +func innerFillLeftBowCols(newSeries *[]Series, left *bow, newNumRows int, commonRows struct{ l, r []int }) { for colIndex := 0; colIndex < left.NumCols(); colIndex++ { buf := NewBuffer(newNumRows, left.ColumnType(colIndex)) - switch left.ColumnType(colIndex) { + switch buf.DataType { case Int64: data := array.NewInt64Data(left.Column(colIndex).Data()) for rowIndex := 0; rowIndex < newNumRows; rowIndex++ { @@ -219,8 +219,15 @@ func innerFillLeftBowCols(newSeries *[]Series, left, right *bow, newNumRows int, buf.SetOrDropStrict(rowIndex, data.Value(commonRows.l[rowIndex])) } } + case TimestampSec, TimestampMilli, TimestampMicro, TimestampNano: + data := array.NewTimestampData(left.Column(colIndex).Data()) + for rowIndex := 0; rowIndex < newNumRows; rowIndex++ { + if data.IsValid(commonRows.l[rowIndex]) { + buf.SetOrDropStrict(rowIndex, data.Value(commonRows.l[rowIndex])) + } + } default: - panic(fmt.Errorf("unsupported type '%v'", left.ColumnType(colIndex))) + panic(fmt.Errorf("unsupported type '%s'", buf.DataType)) } (*newSeries)[colIndex] = NewSeriesFromBuffer(left.ColumnName(colIndex), buf) @@ -238,7 +245,7 @@ func innerFillRightBowCols(newSeries *[]Series, left, right *bow, newNumRows, ne } // Fill common rows from right bow - switch right.ColumnType(rightCol) { + switch buf.DataType { case Int64: data := array.NewInt64Data(right.Column(rightCol).Data()) for rowIndex := 0; rowIndex < newNumRows; rowIndex++ { @@ -267,8 +274,15 @@ func innerFillRightBowCols(newSeries *[]Series, left, right *bow, newNumRows, ne buf.SetOrDropStrict(rowIndex, data.Value(commonRows.r[rowIndex])) } } + case TimestampSec, TimestampMilli, TimestampMicro, TimestampNano: + data := array.NewTimestampData(right.Column(rightCol).Data()) + for rowIndex := 0; rowIndex < newNumRows; rowIndex++ { + if data.IsValid(commonRows.r[rowIndex]) { + buf.SetOrDropStrict(rowIndex, data.Value(commonRows.r[rowIndex])) + } + } default: - panic(fmt.Errorf("unsupported type '%v'", right.ColumnType(rightCol))) + panic(fmt.Errorf("unsupported type '%s'", buf.DataType)) } (*newSeries)[colIndex] = NewSeriesFromBuffer(right.ColumnName(rightCol), buf) @@ -286,7 +300,7 @@ func outerFillLeftBowCols(newSeries *[]Series, left, right *bow, newNumRows, uni buf := NewBuffer(newNumRows, left.ColumnType(colIndex)) // Fill rows from left bow - switch left.ColumnType(colIndex) { + switch buf.DataType { case Int64: data := array.NewInt64Data(left.Column(colIndex).Data()) for newRow := 0; left.NumRows() > 0 && newRow < newNumRows; newRow++ { @@ -375,8 +389,30 @@ func outerFillLeftBowCols(newSeries *[]Series, left, right *bow, newNumRows, uni break } } + case TimestampSec, TimestampMilli, TimestampMicro, TimestampNano: + data := array.NewTimestampData(left.Column(colIndex).Data()) + for newRow := 0; left.NumRows() > 0 && newRow < newNumRows; newRow++ { + if data.IsValid(leftRow) { + buf.SetOrDropStrict(newRow, data.Value(leftRow)) + } + for commonRow < len(commonRows.l) && + leftRow == commonRows.l[commonRow] && + newRow < newNumRows { + if data.IsValid(leftRow) { + buf.SetOrDropStrict(newRow, data.Value(leftRow)) + } + if commonRow+1 < len(commonRows.l) && + commonRows.l[commonRow+1] == leftRow { + newRow++ + } + commonRow++ + } + if leftRow++; leftRow >= left.NumRows() { + break + } + } default: - panic(fmt.Errorf("unsupported type '%v'", left.ColumnType(colIndex))) + panic(fmt.Errorf("unsupported type '%s'", buf.DataType)) } // Fill remaining rows from right bow if column is common @@ -416,7 +452,7 @@ func outerFillRightBowCols(newSeries *[]Series, left, right *bow, newNumCols, } buf := NewBuffer(newNumRows, right.ColumnType(rightCol)) - switch right.ColumnType(rightCol) { + switch buf.DataType { case Int64: data := array.NewInt64Data(right.Column(rightCol).Data()) @@ -441,8 +477,8 @@ func outerFillRightBowCols(newSeries *[]Series, left, right *bow, newNumCols, newRow := left.NumRows() + len(commonRows.r) - uniquesLeft for rightRow := 0; rightRow < right.NumRows(); rightRow++ { var isRowCommon bool - for commonRow := 0; commonRow < len(commonRows.r); commonRow++ { - if rightRow == commonRows.r[commonRow] { + for i := 0; i < len(commonRows.r); i++ { + if rightRow == commonRows.r[i] { isRowCommon = true break } @@ -478,8 +514,8 @@ func outerFillRightBowCols(newSeries *[]Series, left, right *bow, newNumCols, newRow := left.NumRows() + len(commonRows.r) - uniquesLeft for rightRow := 0; rightRow < right.NumRows(); rightRow++ { var isRowCommon bool - for commonRow := 0; commonRow < len(commonRows.r); commonRow++ { - if rightRow == commonRows.r[commonRow] { + for i := 0; i < len(commonRows.r); i++ { + if rightRow == commonRows.r[i] { isRowCommon = true break } @@ -515,8 +551,8 @@ func outerFillRightBowCols(newSeries *[]Series, left, right *bow, newNumCols, newRow := left.NumRows() + len(commonRows.r) - uniquesLeft for rightRow := 0; rightRow < right.NumRows(); rightRow++ { var isRowCommon bool - for commonRow := 0; commonRow < len(commonRows.r); commonRow++ { - if rightRow == commonRows.r[commonRow] { + for i := 0; i < len(commonRows.r); i++ { + if rightRow == commonRows.r[i] { isRowCommon = true break } @@ -552,8 +588,45 @@ func outerFillRightBowCols(newSeries *[]Series, left, right *bow, newNumCols, newRow := left.NumRows() + len(commonRows.r) - uniquesLeft for rightRow := 0; rightRow < right.NumRows(); rightRow++ { var isRowCommon bool - for commonRow := 0; commonRow < len(commonRows.r); commonRow++ { - if rightRow == commonRows.r[commonRow] { + for i := 0; i < len(commonRows.r); i++ { + if rightRow == commonRows.r[i] { + isRowCommon = true + break + } + } + if !isRowCommon { + if data.IsValid(rightRow) { + buf.SetOrDropStrict(newRow, data.Value(rightRow)) + } + newRow++ + } + } + case TimestampSec, TimestampMilli, TimestampMicro, TimestampNano: + data := array.NewTimestampData(right.Column(rightCol).Data()) + + // Fill common rows from right bow + for newRow := 0; newRow < newNumRows; newRow++ { + for commonRow < len(commonRows.l) && + leftRow == commonRows.l[commonRow] && + newRow < newNumRows { + if data.IsValid(commonRows.r[commonRow]) { + buf.SetOrDropStrict(newRow, data.Value(commonRows.r[commonRow])) + } + if commonRow+1 < len(commonRows.l) && + commonRows.l[commonRow+1] == leftRow { + newRow++ + } + commonRow++ + } + leftRow++ + } + + // Fill remaining rows from right bow + newRow := left.NumRows() + len(commonRows.r) - uniquesLeft + for rightRow := 0; rightRow < right.NumRows(); rightRow++ { + var isRowCommon bool + for i := 0; i < len(commonRows.r); i++ { + if rightRow == commonRows.r[i] { isRowCommon = true break } @@ -566,8 +639,9 @@ func outerFillRightBowCols(newSeries *[]Series, left, right *bow, newNumCols, } } default: - panic(fmt.Errorf("unsupported type '%v'", right.ColumnType(rightCol))) + panic(fmt.Errorf("unsupported type '%s'", buf.DataType)) } + (*newSeries)[colIndex] = NewSeriesFromBuffer(right.ColumnName(rightCol), buf) rightCol++ } diff --git a/bowjoin_test.go b/bowjoin_test.go index 0ee2e6d..504bab9 100644 --- a/bowjoin_test.go +++ b/bowjoin_test.go @@ -8,6 +8,8 @@ import ( "github.com/stretchr/testify/require" ) +// TODO: add test cases for timestamp types + func TestBow_OuterJoin(t *testing.T) { t.Run("two empty bows", func(t *testing.T) { b1 := NewBowEmpty() diff --git a/bowjson.go b/bowjson.go index d788288..9d1c71c 100644 --- a/bowjson.go +++ b/bowjson.go @@ -89,7 +89,13 @@ func (b *bow) NewValuesFromJSON(jsonB JSONBow) error { */ for fieldIndex, field := range jsonB.Schema.Fields { - if _, ok := mapArrowNameToBowTypes[field.Type]; ok { + ok := false + for _, arrowType := range mapBowToArrowDataTypes { + if arrowType.Name() == field.Type { + ok = true + } + } + if ok { continue } switch field.Type { diff --git a/bowparquet.go b/bowparquet.go index f59edf2..ee4c519 100644 --- a/bowparquet.go +++ b/bowparquet.go @@ -37,15 +37,12 @@ func NewBowFromParquet(filename string, verbose bool) (Bow, error) { } defer tbl.Release() - //fmt.Printf("SCHEMA:%s\n", tbl.Schema().String()) - //fmt.Printf("COLS:%d\n", tbl.NumCols()) - cols := make([]arrow.Array, tbl.NumCols()) for i := 0; i < int(tbl.NumCols()); i++ { if len(tbl.Column(i).Data().Chunks()) != 1 { - return nil, fmt.Errorf("column %d has %d chunks", i, len(tbl.Column(i).Data().Chunks())) + return nil, fmt.Errorf( + "column %d has %d chunks", i, len(tbl.Column(i).Data().Chunks())) } - //fmt.Printf("FIELD %d\n%+v\n", i, tbl.Schema().Field(i)) cols[i] = tbl.Column(i).Data().Chunk(0) } @@ -97,7 +94,6 @@ func (b *bow) WriteParquet(filename string, verbose bool) error { "bow.WriteParquet: %s successfully written: %d rows\n", filename, b.NumRows()) } - fmt.Printf("%s\n", b.Schema()) return nil } diff --git a/bowparquet_test.go b/bowparquet_test.go index 3d619d6..6900f72 100644 --- a/bowparquet_test.go +++ b/bowparquet_test.go @@ -16,40 +16,55 @@ const ( func TestParquet(t *testing.T) { t.Run("read/write input file", func(t *testing.T) { - bBefore, err := NewBowFromParquet(testInputFileName, true) + bBefore, err := NewBowFromParquet(testInputFileName, false) assert.NoError(t, err) - fmt.Printf("bBefore\n%s\n", bBefore) - assert.NoError(t, bBefore.WriteParquet(testOutputFileName, true)) + assert.NoError(t, bBefore.WriteParquet(testOutputFileName, false)) - bAfter, err := NewBowFromParquet(testOutputFileName+".parquet", true) + bAfter, err := NewBowFromParquet(testOutputFileName+".parquet", false) assert.NoError(t, err) - fmt.Printf("bAfter\n%s\n", bAfter) assert.Equal(t, bBefore.String(), bAfter.String()) require.NoError(t, os.Remove(testOutputFileName+".parquet")) }) - t.Run("bow supported types with rows and nils", func(t *testing.T) { + t.Run("all supported types with rows and nil values", func(t *testing.T) { bBefore, err := NewBowFromRowBasedInterfaces( - []string{"int", "float", "bool", "string"}, - []Type{Int64, Float64, Bool, String}, + []string{"int", "float", "bool", "string", + "timestamp_ms_int", "timestamp_ms_str", + "timestamp_us_int", "timestamp_us_str", + "timestamp_ns_int", "timestamp_ns_str"}, + []Type{Int64, Float64, Bool, String, + TimestampMilli, TimestampMilli, + TimestampMicro, TimestampMicro, + TimestampNano, TimestampNano}, [][]interface{}{ - {1, 1., true, "hi"}, - {2, 2., false, "ho"}, - {nil, nil, nil, nil}, - {3, 3., true, "hu"}, + {1, 1., true, "hi", + 1651017600000, "2022-04-27T00:00:00.123Z", + 1651017600000000, "2022-04-27T00:00:00.123456Z", + 1651017600000000000, "2022-04-27T00:00:00.123456789Z"}, + {2, 2., false, "ho", + 1651021200000, "2022-04-27T01:00:00.123Z", + 1651021200000000, "2022-04-27T01:00:00.123456Z", + 1651021200000000000, "2022-04-27T01:00:00.123456789Z"}, + {nil, nil, nil, nil, nil, nil, nil, nil, nil, nil}, + {3, 3., true, "hu", + 1651028400000, "2022-04-27T03:00:00.123Z", + 1651028400000000, "2022-04-27T03:00:00.123Z456", + 1651028400000000000, "2022-04-27T03:00:00.123456789Z"}, }) require.NoError(t, err) + fmt.Printf("bBefore\n%s\n", bBefore) + assert.NoError(t, bBefore.WriteParquet(testOutputFileName+"_withrows", true)) bAfter, err := NewBowFromParquet(testOutputFileName+"_withrows.parquet", true) assert.NoError(t, err) - fmt.Printf("bBefore\n%s\n", bBefore) fmt.Printf("bAfter\n%s\n", bAfter) + assert.Equal(t, bBefore.String(), bAfter.String()) require.NoError(t, os.Remove(testOutputFileName+"_withrows.parquet")) @@ -62,9 +77,9 @@ func TestParquet(t *testing.T) { [][]interface{}{}) require.NoError(t, err) - assert.NoError(t, bBefore.WriteParquet(testOutputFileName+"_norows", true)) + assert.NoError(t, bBefore.WriteParquet(testOutputFileName+"_norows", false)) - bAfter, err := NewBowFromParquet(testOutputFileName+"_norows.parquet", true) + bAfter, err := NewBowFromParquet(testOutputFileName+"_norows.parquet", false) assert.NoError(t, err) assert.Equal(t, bBefore.String(), bAfter.String()) @@ -76,7 +91,7 @@ func TestParquet(t *testing.T) { bBefore := NewBowEmpty() assert.Errorf(t, - bBefore.WriteParquet(testOutputFileName+"_empty", true), + bBefore.WriteParquet(testOutputFileName+"_empty", false), "bow.WriteParquet: no columns", ) }) diff --git a/bowrecord.go b/bowrecord.go index fa6a59a..510721c 100644 --- a/bowrecord.go +++ b/bowrecord.go @@ -11,10 +11,10 @@ import ( func NewBowFromRecord(record arrow.Record) (Bow, error) { for _, f := range record.Schema().Fields() { if getBowTypeFromArrowFingerprint(f.Type.Fingerprint()) == Unknown { - return nil, fmt.Errorf("unsupported type: ID: %s Name: %s Fingerprint: %s String: %s", - f.Type.ID(), f.Type.Name(), f.Type.Fingerprint(), f.Type) + return nil, fmt.Errorf("unsupported type '%s'", f.Type) } } + return &bow{Record: record}, nil } diff --git a/bowseries.go b/bowseries.go index 977255c..78a2e30 100644 --- a/bowseries.go +++ b/bowseries.go @@ -19,16 +19,19 @@ type Series struct { func NewSeries(name string, typ Type, dataArray interface{}, validityArray interface{}) Series { switch typ { case Int64: - return newInt64Series(name, dataArray.([]int64), buildNullBitmapBytes(len(dataArray.([]int64)), validityArray)) + return newInt64Series(name, dataArray.([]int64), + buildNullBitmapBytes(len(dataArray.([]int64)), validityArray)) case Float64: - return newFloat64Series(name, dataArray.([]float64), buildNullBitmapBytes(len(dataArray.([]float64)), validityArray)) + return newFloat64Series(name, dataArray.([]float64), + buildNullBitmapBytes(len(dataArray.([]float64)), validityArray)) case Bool: - return newBooleanSeries(name, dataArray.([]bool), buildNullBitmapBytes(len(dataArray.([]bool)), validityArray)) + return newBooleanSeries(name, dataArray.([]bool), + buildNullBitmapBytes(len(dataArray.([]bool)), validityArray)) case String: - return newStringSeries(name, dataArray.([]string), buildNullBitmapBytes(len(dataArray.([]string)), validityArray)) + return newStringSeries(name, dataArray.([]string), + buildNullBitmapBytes(len(dataArray.([]string)), validityArray)) case TimestampSec, TimestampMilli, TimestampMicro, TimestampNano: - return newTimestampSeries(name, typ, - dataArray.([]arrow.Timestamp), + return newTimestampSeries(name, typ, dataArray.([]arrow.Timestamp), buildNullBitmapBytes(len(dataArray.([]arrow.Timestamp)), validityArray)) default: panic(fmt.Errorf("unsupported type '%s'", typ)) @@ -119,7 +122,7 @@ func NewSeriesFromInterfaces(name string, typ Type, cells []interface{}) Series defer builder.Release() builder.Resize(len(cells)) for i := 0; i < len(cells); i++ { - v, ok := ToTimestamp(cells[i]) + v, ok := mapBowTypeToConvertFunc[typ](cells[i]) if !ok { builder.AppendNull() continue diff --git a/bowseries_test.go b/bowseries_test.go index e0d1f75..6b76e25 100644 --- a/bowseries_test.go +++ b/bowseries_test.go @@ -14,7 +14,6 @@ func TestNewSeriesFromInterfaces(t *testing.T) { testcase := []interface{}{typ.Convert(0), nil} res, err := NewBow(NewSeriesFromInterfaces(typ.String(), typ, testcase)) require.NoError(t, err) - fmt.Printf("BOW\n%s\n", res) assert.Equal(t, typ.Convert(0), res.GetValue(0, 0)) assert.Equal(t, nil, res.GetValue(0, 1)) }) diff --git a/bowsort.go b/bowsort.go index 5fed9f4..f026742 100644 --- a/bowsort.go +++ b/bowsort.go @@ -6,7 +6,6 @@ import ( ) // SortByCol returns a new Bow with the rows sorted by a column in ascending order. -// The only type currently supported for the column to sort by is Int64, without nil values. // Returns the same Bow if the column is already sorted. func (b *bow) SortByCol(colIndex int) (Bow, error) { if b.Column(colIndex).NullN() != 0 { diff --git a/bowsort_test.go b/bowsort_test.go index 29376cf..4d56f34 100644 --- a/bowsort_test.go +++ b/bowsort_test.go @@ -52,6 +52,30 @@ func TestBow_SortByCol(t *testing.T) { assert.EqualValues(t, expected.String(), sorted.String()) }) + t.Run("unsorted with timestamps", func(t *testing.T) { + b, err := NewBowFromRowBasedInterfaces( + []string{"timestamp_ms", "a"}, + []Type{TimestampMilli, Int64}, + [][]interface{}{ + {"2022-01-01T00:00:00Z", 1}, + {"2022-01-03T00:00:00Z", 3}, + {"2022-01-02T00:00:00Z", 2}, + }) + require.NoError(t, err) + expected, err := NewBowFromRowBasedInterfaces( + []string{"timestamp_ms", "a"}, + []Type{TimestampMilli, Int64}, + [][]interface{}{ + {"2022-01-01T00:00:00Z", 1}, + {"2022-01-02T00:00:00Z", 2}, + {"2022-01-03T00:00:00Z", 3}, + }) + require.NoError(t, err) + sorted, err := b.SortByCol(0) + assert.NoError(t, err) + assert.EqualValues(t, expected.String(), sorted.String()) + }) + t.Run("unsorted with different cols", func(t *testing.T) { b, err := NewBowFromRowBasedInterfaces( []string{"a", "b", "time"}, diff --git a/bowstring.go b/bowstring.go index 599839d..f6cadc1 100644 --- a/bowstring.go +++ b/bowstring.go @@ -33,17 +33,21 @@ func (b *bow) String() string { for row := range b.GetRowsChan() { cells = []string{} for colIndex := 0; colIndex < b.NumCols(); colIndex++ { - ti, _ := row[b.Schema().Field(colIndex).Name].(arrow.Timestamp) - switch b.ColumnType(colIndex) { - case TimestampSec: - cells = append(cells, ti.ToTime(arrow.Second).Format(time.RFC3339Nano)) - case TimestampMilli: - cells = append(cells, ti.ToTime(arrow.Millisecond).Format(time.RFC3339Nano)) - case TimestampMicro: - cells = append(cells, ti.ToTime(arrow.Microsecond).Format(time.RFC3339Nano)) - case TimestampNano: - cells = append(cells, ti.ToTime(arrow.Nanosecond).Format(time.RFC3339Nano)) - default: + ti, ok := row[b.Schema().Field(colIndex).Name].(arrow.Timestamp) + if ok { + switch b.ColumnType(colIndex) { + case TimestampSec: + cells = append(cells, ti.ToTime(arrow.Second).Format(time.RFC3339Nano)) + case TimestampMilli: + cells = append(cells, ti.ToTime(arrow.Millisecond).Format(time.RFC3339Nano)) + case TimestampMicro: + cells = append(cells, ti.ToTime(arrow.Microsecond).Format(time.RFC3339Nano)) + case TimestampNano: + cells = append(cells, ti.ToTime(arrow.Nanosecond).Format(time.RFC3339Nano)) + default: + panic("") + } + } else { cells = append(cells, fmt.Sprintf("%v", row[b.Schema().Field(colIndex).Name])) } } @@ -52,9 +56,11 @@ func (b *bow) String() string { } } - _, err = fmt.Fprintf(w, "metadata: %+v\n", b.Metadata()) - if err != nil { - panic(err) + if b.Metadata().Len() > 0 { + _, err = fmt.Fprintf(w, "metadata: %+v\n", b.Metadata()) + if err != nil { + panic(err) + } } if err = w.Flush(); err != nil { diff --git a/bowtypes.go b/bowtypes.go index e682c4d..670a93e 100644 --- a/bowtypes.go +++ b/bowtypes.go @@ -49,25 +49,11 @@ var ( TimestampMicro: arrow.FixedWidthTypes.Timestamp_us, TimestampNano: arrow.FixedWidthTypes.Timestamp_ns, } - mapArrowFingerprintToBowTypes = map[string]Type{ - arrow.PrimitiveTypes.Float64.Fingerprint(): Float64, - arrow.PrimitiveTypes.Int64.Fingerprint(): Int64, - arrow.FixedWidthTypes.Boolean.Fingerprint(): Bool, - arrow.BinaryTypes.String.Fingerprint(): String, - arrow.FixedWidthTypes.Timestamp_s.Fingerprint(): TimestampSec, - arrow.FixedWidthTypes.Timestamp_ms.Fingerprint(): TimestampMilli, - arrow.FixedWidthTypes.Timestamp_us.Fingerprint(): TimestampMicro, - arrow.FixedWidthTypes.Timestamp_ns.Fingerprint(): TimestampNano, - } - mapArrowNameToBowTypes = map[string]Type{ - arrow.PrimitiveTypes.Float64.Name(): Float64, - arrow.PrimitiveTypes.Int64.Name(): Int64, - arrow.FixedWidthTypes.Boolean.Name(): Bool, - arrow.BinaryTypes.String.Name(): String, - arrow.FixedWidthTypes.Timestamp_s.Name(): TimestampSec, - arrow.FixedWidthTypes.Timestamp_ms.Name(): TimestampMilli, - arrow.FixedWidthTypes.Timestamp_us.Name(): TimestampMicro, - arrow.FixedWidthTypes.Timestamp_ns.Name(): TimestampNano, + mapBowTypeToConvertFunc = map[Type]func(i interface{}) (arrow.Timestamp, bool){ + TimestampSec: ToTimestampSec, + TimestampMilli: ToTimestampMilli, + TimestampMicro: ToTimestampMicro, + TimestampNano: ToTimestampNano, } allType = func() []Type { res := make([]Type, InputDependent-1) @@ -90,8 +76,14 @@ func (t Type) Convert(i interface{}) interface{} { val, ok = ToBool(i) case String: val, ok = ToString(i) - case TimestampSec, TimestampMilli, TimestampMicro, TimestampNano: - val, ok = ToTimestamp(i) + case TimestampSec: + val, ok = ToTimestampSec(i) + case TimestampMilli: + val, ok = ToTimestampMilli(i) + case TimestampMicro: + val, ok = ToTimestampMicro(i) + case TimestampNano: + val, ok = ToTimestampNano(i) } if ok { return val @@ -115,19 +107,21 @@ func (t Type) String() string { } func getBowTypeFromArrowFingerprint(fingerprint string) Type { - typ, ok := mapArrowFingerprintToBowTypes[fingerprint] - if !ok { - return Unknown + for bowType, arrowType := range mapBowToArrowDataTypes { + if arrowType.Fingerprint() == fingerprint { + return bowType + } } - return typ + return Unknown } func getBowTypeFromArrowName(name string) Type { - typ, ok := mapArrowNameToBowTypes[name] - if !ok { - return Unknown + for bowType, arrowType := range mapBowToArrowDataTypes { + if arrowType.Name() == name { + return bowType + } } - return typ + return Unknown } func GetAllTypes() []Type { diff --git a/bowvalues.go b/bowvalues.go index 33d2e8a..be2986c 100644 --- a/bowvalues.go +++ b/bowvalues.go @@ -5,15 +5,15 @@ import ( "github.com/apache/arrow/go/v8/arrow/array" ) -func Int64Values(arr *array.Int64) []int64 { +func int64Values(arr *array.Int64) []int64 { return arr.Int64Values() } -func Float64Values(arr *array.Float64) []float64 { +func float64Values(arr *array.Float64) []float64 { return arr.Float64Values() } -func BooleanValues(arr *array.Boolean) []bool { +func booleanValues(arr *array.Boolean) []bool { var res = make([]bool, arr.Len()) for i := range res { res[i] = arr.Value(i) @@ -21,7 +21,7 @@ func BooleanValues(arr *array.Boolean) []bool { return res } -func StringValues(arr *array.String) []string { +func stringValues(arr *array.String) []string { var res = make([]string, arr.Len()) for i := range res { res[i] = arr.Value(i) @@ -29,7 +29,7 @@ func StringValues(arr *array.String) []string { return res } -func TimestampValues(arr *array.Timestamp) []arrow.Timestamp { +func timestampValues(arr *array.Timestamp) []arrow.Timestamp { var res = make([]arrow.Timestamp, arr.Len()) for i := range res { res[i] = arr.Value(i) From 4ea1ec89703207c27d6ae667b0490770cf4e58fd Mon Sep 17 00:00:00 2001 From: agelloz Date: Thu, 28 Apr 2022 11:31:30 +0200 Subject: [PATCH 07/29] clean up --- bowassertion.go | 1 - 1 file changed, 1 deletion(-) diff --git a/bowassertion.go b/bowassertion.go index a3c39eb..2bcc297 100644 --- a/bowassertion.go +++ b/bowassertion.go @@ -12,7 +12,6 @@ const ( // IsColSorted returns a boolean whether the column colIndex is sorted or not, skipping nil values. // An empty column or an unsupported data type returns false. -// Supports only Int64 and Float64. func (b *bow) IsColSorted(colIndex int) bool { if b.IsColEmpty(colIndex) { return false From 4335274d1c17ed68d7290ff9fe6576e68926cb7d Mon Sep 17 00:00:00 2001 From: agelloz Date: Thu, 28 Apr 2022 11:51:03 +0200 Subject: [PATCH 08/29] first --- Makefile | 3 - bowappend.gen.go.tmpl | 64 ----- bowappend.gen.go => bowappend.go | 2 - bowbuffer.gen.go | 200 ---------------- bowbuffer.gen.go.tmpl | 143 ----------- bowbuffer.go | 173 ++++++++++++++ bowjoin.gen.go | 397 ------------------------------- bowjoin.gen.go.tmpl | 184 -------------- bowjoin.go | 390 ++++++++++++++++++++++++++++++ bowseries.gen.go | 162 ------------- bowseries.gen.go.tmpl | 96 -------- bowseries.gen_test.go | 20 -- bowseries.go | 153 ++++++++++++ bowseries_test.go | 15 ++ datatypes.yml | 16 -- doc.go | 6 - 16 files changed, 731 insertions(+), 1293 deletions(-) delete mode 100644 bowappend.gen.go.tmpl rename bowappend.gen.go => bowappend.go (98%) delete mode 100644 bowbuffer.gen.go delete mode 100644 bowbuffer.gen.go.tmpl delete mode 100644 bowjoin.gen.go delete mode 100644 bowjoin.gen.go.tmpl delete mode 100644 bowseries.gen.go delete mode 100644 bowseries.gen.go.tmpl delete mode 100644 bowseries.gen_test.go delete mode 100644 datatypes.yml delete mode 100644 doc.go diff --git a/Makefile b/Makefile index 627d615..7211e58 100644 --- a/Makefile +++ b/Makefile @@ -7,9 +7,6 @@ install: @go install github.com/Metronlab/genius@latest curl -sSfL https://raw.githubusercontent.com/golangci/golangci-lint/master/install.sh | sudo sh -s -- -b $(go env GOPATH)/bin v1.45.2 -gen: - @go generate $(PKG) - lint: golangci-lint run -E gofmt,gci --fix -v $(PKG) diff --git a/bowappend.gen.go.tmpl b/bowappend.gen.go.tmpl deleted file mode 100644 index 2afbdbf..0000000 --- a/bowappend.gen.go.tmpl +++ /dev/null @@ -1,64 +0,0 @@ -package bow - -import ( - "fmt" - - "github.com/apache/arrow/go/v8/arrow" - "github.com/apache/arrow/go/v8/arrow/array" - "github.com/apache/arrow/go/v8/arrow/memory" -) - -// AppendBows attempts to append bows with equal schemas. -// Different schemas will lead to undefined behavior. -// Resulting metadata is copied from the first bow. -func AppendBows(bows ...Bow) (Bow, error) { - if len(bows) == 0 { - return nil, nil - } - - if len(bows) == 1 { - return bows[0], nil - } - - numRows := 0 - for _, b := range bows { - numRows += b.NumRows() - } - - refBow := bows[0] - series := make([]Series, refBow.NumCols()) - - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - for colIndex := 0; colIndex < refBow.NumCols(); colIndex++ { - var newArray arrow.Array - refType := refBow.ColumnType(colIndex) - switch refType { - {{range .Data.types -}} - case {{ .Type }}: - builder := array.New{{ .Type }}Builder(mem) - builder.Resize(numRows) - for _, b := range bows { - if colType := b.ColumnType(colIndex); colType != refType { - return nil, fmt.Errorf( - "bow.AppendBows: incompatible types %v and %v", refType, colType) - } - data := b.(*bow).Column(colIndex).Data() - arr := array.New{{ .Type }}Data(data) - v := {{ .Type }}Values(arr) - valid := getValiditySlice(arr) - builder.AppendValues(v, valid) - } - newArray = builder.NewArray() - {{end -}} - default: - return nil, fmt.Errorf("unsupported type %v", refType) - } - - series[colIndex] = Series{ - Name: refBow.ColumnName(colIndex), - Array: newArray, - } - } - - return NewBowWithMetadata(refBow.Metadata(), series...) -} diff --git a/bowappend.gen.go b/bowappend.go similarity index 98% rename from bowappend.gen.go rename to bowappend.go index ef3188b..a638c59 100644 --- a/bowappend.gen.go +++ b/bowappend.go @@ -1,5 +1,3 @@ -// Code generated by bowappend.gen.go.tmpl. DO NOT EDIT. - package bow import ( diff --git a/bowbuffer.gen.go b/bowbuffer.gen.go deleted file mode 100644 index fdf5b2d..0000000 --- a/bowbuffer.gen.go +++ /dev/null @@ -1,200 +0,0 @@ -// Code generated by bowbuffer.gen.go.tmpl. DO NOT EDIT. - -package bow - -import ( - "fmt" - - "github.com/apache/arrow/go/v8/arrow/array" - "github.com/apache/arrow/go/v8/arrow/bitutil" -) - -// NewBuffer returns a new Buffer of size `size` and Type `typ`. -func NewBuffer(size int, typ Type) Buffer { - switch typ { - case Int64: - return Buffer{ - Data: make([]int64, size), - nullBitmapBytes: make([]byte, bitutil.CeilByte(size)/8), - } - case Float64: - return Buffer{ - Data: make([]float64, size), - nullBitmapBytes: make([]byte, bitutil.CeilByte(size)/8), - } - case Boolean: - return Buffer{ - Data: make([]bool, size), - nullBitmapBytes: make([]byte, bitutil.CeilByte(size)/8), - } - case String: - return Buffer{ - Data: make([]string, size), - nullBitmapBytes: make([]byte, bitutil.CeilByte(size)/8), - } - default: - panic(fmt.Errorf("unsupported type %s", typ)) - } -} - -// NewBufferFromData returns from `data`, which has to be a slice of a supported type. -func NewBufferFromData(data interface{}) Buffer { - var l int - switch data.(type) { - case []int64: - case []float64: - case []bool: - case []string: - default: - panic(fmt.Errorf("unhandled type %T", data)) - } - return Buffer{ - Data: data, - nullBitmapBytes: buildNullBitmapBytes(l, nil), - } -} - -// Len returns the length of the Buffer -func (b Buffer) Len() int { - switch data := b.Data.(type) { - case []int64: - return len(data) - case []float64: - return len(data) - case []bool: - return len(data) - case []string: - return len(data) - default: - panic(fmt.Errorf("unsupported type '%T'", b.Data)) - } -} - -// SetOrDrop sets the value `value` at index `i` by attempting a type conversion to the Buffer Type. -// Set the bit in the Buffer nullBitmapBytes if the conversion succeeded, or clear it otherwise. -func (b *Buffer) SetOrDrop(i int, value interface{}) { - var valid bool - switch v := b.Data.(type) { - case []int64: - v[i], valid = Int64.Convert(value).(int64) - case []float64: - v[i], valid = Float64.Convert(value).(float64) - case []bool: - v[i], valid = Boolean.Convert(value).(bool) - case []string: - v[i], valid = String.Convert(value).(string) - default: - panic(fmt.Errorf("unsupported type %T", v)) - } - - if valid { - bitutil.SetBit(b.nullBitmapBytes, i) - } else { - bitutil.ClearBit(b.nullBitmapBytes, i) - } -} - -// SetOrDrop sets the value `value` at index `i` by attempting a type assertion to the Buffer Type. -// Set the bit in the Buffer nullBitmapBytes if the type assertion succeeded, or clear it otherwise. -func (b *Buffer) SetOrDropStrict(i int, value interface{}) { - var valid bool - switch v := b.Data.(type) { - case []int64: - v[i], valid = value.(int64) - case []float64: - v[i], valid = value.(float64) - case []bool: - v[i], valid = value.(bool) - case []string: - v[i], valid = value.(string) - default: - panic(fmt.Errorf("unsupported type %T", v)) - } - - if valid { - bitutil.SetBit(b.nullBitmapBytes, i) - } else { - bitutil.ClearBit(b.nullBitmapBytes, i) - } -} - -// GetValue gets the value at index `i` from the Buffer -func (b *Buffer) GetValue(i int) interface{} { - if bitutil.BitIsNotSet(b.nullBitmapBytes, i) { - return nil - } - switch v := b.Data.(type) { - case []int64: - return v[i] - case []float64: - return v[i] - case []bool: - return v[i] - case []string: - return v[i] - default: - panic(fmt.Errorf("unsupported type %T", v)) - } -} - -// Less returns whether the value at index `i` is less that the value at index `j`. -func (b Buffer) Less(i, j int) bool { - switch v := b.Data.(type) { - case []int64: - return v[i] < v[j] - case []float64: - return v[i] < v[j] - case []string: - return v[i] < v[j] - case []bool: - return !v[i] && v[j] - default: - panic(fmt.Errorf("unsupported type %T", v)) - } -} - -// NewBufferFromCol returns a new Buffer created from the column at index `colIndex`. -func (b *bow) NewBufferFromCol(colIndex int) Buffer { - data := b.Column(colIndex).Data() - switch b.ColumnType(colIndex) { - case Int64: - arr := array.NewInt64Data(data) - nullBitmapBytes := arr.NullBitmapBytes()[:bitutil.CeilByte(arr.Data().Len())/8] - nullBitmapBytesCopy := make([]byte, len(nullBitmapBytes)) - copy(nullBitmapBytesCopy, nullBitmapBytes) - return Buffer{ - Data: Int64Values(arr), - nullBitmapBytes: nullBitmapBytesCopy, - } - case Float64: - arr := array.NewFloat64Data(data) - nullBitmapBytes := arr.NullBitmapBytes()[:bitutil.CeilByte(arr.Data().Len())/8] - nullBitmapBytesCopy := make([]byte, len(nullBitmapBytes)) - copy(nullBitmapBytesCopy, nullBitmapBytes) - return Buffer{ - Data: Float64Values(arr), - nullBitmapBytes: nullBitmapBytesCopy, - } - case Boolean: - arr := array.NewBooleanData(data) - nullBitmapBytes := arr.NullBitmapBytes()[:bitutil.CeilByte(arr.Data().Len())/8] - nullBitmapBytesCopy := make([]byte, len(nullBitmapBytes)) - copy(nullBitmapBytesCopy, nullBitmapBytes) - return Buffer{ - Data: BooleanValues(arr), - nullBitmapBytes: nullBitmapBytesCopy, - } - case String: - arr := array.NewStringData(data) - nullBitmapBytes := arr.NullBitmapBytes()[:bitutil.CeilByte(arr.Data().Len())/8] - nullBitmapBytesCopy := make([]byte, len(nullBitmapBytes)) - copy(nullBitmapBytesCopy, nullBitmapBytes) - return Buffer{ - Data: StringValues(arr), - nullBitmapBytes: nullBitmapBytesCopy, - } - default: - panic(fmt.Errorf( - "unsupported type %+v", b.ColumnType(colIndex))) - } -} diff --git a/bowbuffer.gen.go.tmpl b/bowbuffer.gen.go.tmpl deleted file mode 100644 index 4e4a7f1..0000000 --- a/bowbuffer.gen.go.tmpl +++ /dev/null @@ -1,143 +0,0 @@ -package bow - -import ( - "fmt" - - "github.com/apache/arrow/go/v8/arrow/array" - "github.com/apache/arrow/go/v8/arrow/bitutil" -) - -// NewBuffer returns a new Buffer of size `size` and Type `typ`. -func NewBuffer(size int, typ Type) Buffer { - switch typ { - {{range .Data.types -}} - case {{ .Type }}: - return Buffer{ - Data: make([]{{ .type }}, size), - nullBitmapBytes: make([]byte, bitutil.CeilByte(size) / 8), - } - {{end -}} - default: - panic(fmt.Errorf("unsupported type %s", typ)) - } -} - -// NewBufferFromData returns from `data`, which has to be a slice of a supported type. -func NewBufferFromData(data interface{}) Buffer { - var l int - switch data.(type) { - {{range .Data.types -}} - case []{{ .type }}: - {{end -}} - default: - panic(fmt.Errorf("unhandled type %T", data)) - } - return Buffer{ - Data: data, - nullBitmapBytes: buildNullBitmapBytes(l, nil), - } -} - -// Len returns the length of the Buffer -func (b Buffer) Len() int { - switch data := b.Data.(type) { - {{range .Data.types -}} - case []{{ .type }}: - return len(data) - {{end -}} - default: - panic(fmt.Errorf("unsupported type '%T'", b.Data)) - } -} - -// SetOrDrop sets the value `value` at index `i` by attempting a type conversion to the Buffer Type. -// Set the bit in the Buffer nullBitmapBytes if the conversion succeeded, or clear it otherwise. -func (b *Buffer) SetOrDrop(i int, value interface{}) { - var valid bool - switch v := b.Data.(type) { - {{range .Data.types -}} - case []{{ .type }}: - v[i], valid = {{ .Type }}.Convert(value).({{ .type }}) - {{end -}} - default: - panic(fmt.Errorf("unsupported type %T", v)) - } - - if valid { - bitutil.SetBit(b.nullBitmapBytes, i) - } else { - bitutil.ClearBit(b.nullBitmapBytes, i) - } -} - -// SetOrDrop sets the value `value` at index `i` by attempting a type assertion to the Buffer Type. -// Set the bit in the Buffer nullBitmapBytes if the type assertion succeeded, or clear it otherwise. -func (b *Buffer) SetOrDropStrict(i int, value interface{}) { - var valid bool - switch v := b.Data.(type) { - {{range .Data.types -}} - case []{{ .type }}: - v[i], valid = value.({{ .type }}) - {{end -}} - default: - panic(fmt.Errorf("unsupported type %T", v)) - } - - if valid { - bitutil.SetBit(b.nullBitmapBytes, i) - } else { - bitutil.ClearBit(b.nullBitmapBytes, i) - } -} - -// GetValue gets the value at index `i` from the Buffer -func (b *Buffer) GetValue(i int) interface{} { - if bitutil.BitIsNotSet(b.nullBitmapBytes, i) { - return nil - } - switch v := b.Data.(type) { - {{range .Data.types -}} - case []{{ .type }}: - return v[i] - {{end -}} - default: - panic(fmt.Errorf("unsupported type %T", v)) - } -} - -// Less returns whether the value at index `i` is less that the value at index `j`. -func (b Buffer) Less(i, j int) bool { - switch v := b.Data.(type) { - {{range .Data.types -}} - {{if .comparable -}} - case []{{ .type }}: - return v[i] < v[j] - {{end -}} - {{end -}} - case []bool: - return !v[i] && v[j] - default: - panic(fmt.Errorf("unsupported type %T", v)) - } -} - -// NewBufferFromCol returns a new Buffer created from the column at index `colIndex`. -func (b *bow) NewBufferFromCol(colIndex int) Buffer { - data := b.Column(colIndex).Data() - switch b.ColumnType(colIndex) { - {{range .Data.types -}} - case {{ .Type }}: - arr := array.New{{ .Type }}Data(data) - nullBitmapBytes := arr.NullBitmapBytes()[:bitutil.CeilByte(arr.Data().Len())/8] - nullBitmapBytesCopy := make([]byte, len(nullBitmapBytes)) - copy(nullBitmapBytesCopy, nullBitmapBytes) - return Buffer{ - Data: {{ .Type }}Values(arr), - nullBitmapBytes: nullBitmapBytesCopy, - } - {{end -}} - default: - panic(fmt.Errorf( - "unsupported type %+v", b.ColumnType(colIndex))) - } -} diff --git a/bowbuffer.go b/bowbuffer.go index b708f99..2ac4eb5 100644 --- a/bowbuffer.go +++ b/bowbuffer.go @@ -4,6 +4,7 @@ import ( "fmt" "sort" + "github.com/apache/arrow/go/v8/arrow/array" "github.com/apache/arrow/go/v8/arrow/bitutil" ) @@ -15,6 +16,178 @@ type Buffer struct { nullBitmapBytes []byte } +// NewBuffer returns a new Buffer of size `size` and Type `typ`. +func NewBuffer(size int, typ Type) Buffer { + switch typ { + case Int64: + return Buffer{ + Data: make([]int64, size), + nullBitmapBytes: make([]byte, bitutil.CeilByte(size)/8), + } + case Float64: + return Buffer{ + Data: make([]float64, size), + nullBitmapBytes: make([]byte, bitutil.CeilByte(size)/8), + } + case Boolean: + return Buffer{ + Data: make([]bool, size), + nullBitmapBytes: make([]byte, bitutil.CeilByte(size)/8), + } + case String: + return Buffer{ + Data: make([]string, size), + nullBitmapBytes: make([]byte, bitutil.CeilByte(size)/8), + } + default: + panic(fmt.Errorf("unsupported type %s", typ)) + } +} + +// Len returns the length of the Buffer +func (b Buffer) Len() int { + switch data := b.Data.(type) { + case []int64: + return len(data) + case []float64: + return len(data) + case []bool: + return len(data) + case []string: + return len(data) + default: + panic(fmt.Errorf("unsupported type '%T'", b.Data)) + } +} + +// SetOrDrop sets the value `value` at index `i` by attempting a type conversion to the Buffer Type. +// Set the bit in the Buffer nullBitmapBytes if the conversion succeeded, or clear it otherwise. +func (b *Buffer) SetOrDrop(i int, value interface{}) { + var valid bool + switch v := b.Data.(type) { + case []int64: + v[i], valid = Int64.Convert(value).(int64) + case []float64: + v[i], valid = Float64.Convert(value).(float64) + case []bool: + v[i], valid = Boolean.Convert(value).(bool) + case []string: + v[i], valid = String.Convert(value).(string) + default: + panic(fmt.Errorf("unsupported type %T", v)) + } + + if valid { + bitutil.SetBit(b.nullBitmapBytes, i) + } else { + bitutil.ClearBit(b.nullBitmapBytes, i) + } +} + +// SetOrDropStrict sets the value `value` at index `i` by attempting a type assertion to the Buffer Type. +// Set the bit in the Buffer nullBitmapBytes if the type assertion succeeded, or clear it otherwise. +func (b *Buffer) SetOrDropStrict(i int, value interface{}) { + var valid bool + switch v := b.Data.(type) { + case []int64: + v[i], valid = value.(int64) + case []float64: + v[i], valid = value.(float64) + case []bool: + v[i], valid = value.(bool) + case []string: + v[i], valid = value.(string) + default: + panic(fmt.Errorf("unsupported type %T", v)) + } + + if valid { + bitutil.SetBit(b.nullBitmapBytes, i) + } else { + bitutil.ClearBit(b.nullBitmapBytes, i) + } +} + +// GetValue gets the value at index `i` from the Buffer +func (b *Buffer) GetValue(i int) interface{} { + if bitutil.BitIsNotSet(b.nullBitmapBytes, i) { + return nil + } + switch v := b.Data.(type) { + case []int64: + return v[i] + case []float64: + return v[i] + case []bool: + return v[i] + case []string: + return v[i] + default: + panic(fmt.Errorf("unsupported type %T", v)) + } +} + +// Less returns whether the value at index `i` is less that the value at index `j`. +func (b Buffer) Less(i, j int) bool { + switch v := b.Data.(type) { + case []int64: + return v[i] < v[j] + case []float64: + return v[i] < v[j] + case []string: + return v[i] < v[j] + case []bool: + return !v[i] && v[j] + default: + panic(fmt.Errorf("unsupported type %T", v)) + } +} + +// NewBufferFromCol returns a new Buffer created from the column at index `colIndex`. +func (b *bow) NewBufferFromCol(colIndex int) Buffer { + data := b.Column(colIndex).Data() + switch b.ColumnType(colIndex) { + case Int64: + arr := array.NewInt64Data(data) + nullBitmapBytes := arr.NullBitmapBytes()[:bitutil.CeilByte(arr.Data().Len())/8] + nullBitmapBytesCopy := make([]byte, len(nullBitmapBytes)) + copy(nullBitmapBytesCopy, nullBitmapBytes) + return Buffer{ + Data: Int64Values(arr), + nullBitmapBytes: nullBitmapBytesCopy, + } + case Float64: + arr := array.NewFloat64Data(data) + nullBitmapBytes := arr.NullBitmapBytes()[:bitutil.CeilByte(arr.Data().Len())/8] + nullBitmapBytesCopy := make([]byte, len(nullBitmapBytes)) + copy(nullBitmapBytesCopy, nullBitmapBytes) + return Buffer{ + Data: Float64Values(arr), + nullBitmapBytes: nullBitmapBytesCopy, + } + case Boolean: + arr := array.NewBooleanData(data) + nullBitmapBytes := arr.NullBitmapBytes()[:bitutil.CeilByte(arr.Data().Len())/8] + nullBitmapBytesCopy := make([]byte, len(nullBitmapBytes)) + copy(nullBitmapBytesCopy, nullBitmapBytes) + return Buffer{ + Data: BooleanValues(arr), + nullBitmapBytes: nullBitmapBytesCopy, + } + case String: + arr := array.NewStringData(data) + nullBitmapBytes := arr.NullBitmapBytes()[:bitutil.CeilByte(arr.Data().Len())/8] + nullBitmapBytesCopy := make([]byte, len(nullBitmapBytes)) + copy(nullBitmapBytesCopy, nullBitmapBytes) + return Buffer{ + Data: StringValues(arr), + nullBitmapBytes: nullBitmapBytesCopy, + } + default: + panic(fmt.Errorf( + "unsupported type %+v", b.ColumnType(colIndex))) + } +} func buildNullBitmapBytes(dataLength int, validityArray interface{}) []byte { var res []byte nullBitmapLength := bitutil.CeilByte(dataLength) / 8 diff --git a/bowjoin.gen.go b/bowjoin.gen.go deleted file mode 100644 index 81f82b9..0000000 --- a/bowjoin.gen.go +++ /dev/null @@ -1,397 +0,0 @@ -// Code generated by bowjoin.gen.go.tmpl. DO NOT EDIT. - -package bow - -import ( - "fmt" - - "github.com/apache/arrow/go/v8/arrow/array" -) - -func innerFillLeftBowCols(newSeries *[]Series, left, right *bow, newNumRows int, - commonRows struct{ l, r []int }) { - - for colIndex := 0; colIndex < left.NumCols(); colIndex++ { - buf := NewBuffer(newNumRows, left.ColumnType(colIndex)) - switch left.ColumnType(colIndex) { - case Int64: - data := array.NewInt64Data(left.Column(colIndex).Data()) - for rowIndex := 0; rowIndex < newNumRows; rowIndex++ { - if data.IsValid(commonRows.l[rowIndex]) { - buf.SetOrDropStrict(rowIndex, data.Value(commonRows.l[rowIndex])) - } - } - case Float64: - data := array.NewFloat64Data(left.Column(colIndex).Data()) - for rowIndex := 0; rowIndex < newNumRows; rowIndex++ { - if data.IsValid(commonRows.l[rowIndex]) { - buf.SetOrDropStrict(rowIndex, data.Value(commonRows.l[rowIndex])) - } - } - case Boolean: - data := array.NewBooleanData(left.Column(colIndex).Data()) - for rowIndex := 0; rowIndex < newNumRows; rowIndex++ { - if data.IsValid(commonRows.l[rowIndex]) { - buf.SetOrDropStrict(rowIndex, data.Value(commonRows.l[rowIndex])) - } - } - case String: - data := array.NewStringData(left.Column(colIndex).Data()) - for rowIndex := 0; rowIndex < newNumRows; rowIndex++ { - if data.IsValid(commonRows.l[rowIndex]) { - buf.SetOrDropStrict(rowIndex, data.Value(commonRows.l[rowIndex])) - } - } - default: - panic(fmt.Errorf("unsupported type '%v'", left.ColumnType(colIndex))) - } - - (*newSeries)[colIndex] = NewSeriesFromBuffer(left.ColumnName(colIndex), buf) - } -} - -func innerFillRightBowCols(newSeries *[]Series, left, right *bow, newNumRows, newNumCols int, - commonCols map[string][]Buffer, commonRows struct{ l, r []int }) { - var rightCol int - - for colIndex := left.NumCols(); colIndex < newNumCols; colIndex++ { - buf := NewBuffer(newNumRows, right.ColumnType(rightCol)) - for commonCols[right.ColumnName(rightCol)] != nil { - rightCol++ - } - - // Fill common rows from right bow - switch right.ColumnType(rightCol) { - case Int64: - data := array.NewInt64Data(right.Column(rightCol).Data()) - for rowIndex := 0; rowIndex < newNumRows; rowIndex++ { - if data.IsValid(commonRows.r[rowIndex]) { - buf.SetOrDropStrict(rowIndex, data.Value(commonRows.r[rowIndex])) - } - } - case Float64: - data := array.NewFloat64Data(right.Column(rightCol).Data()) - for rowIndex := 0; rowIndex < newNumRows; rowIndex++ { - if data.IsValid(commonRows.r[rowIndex]) { - buf.SetOrDropStrict(rowIndex, data.Value(commonRows.r[rowIndex])) - } - } - case Boolean: - data := array.NewBooleanData(right.Column(rightCol).Data()) - for rowIndex := 0; rowIndex < newNumRows; rowIndex++ { - if data.IsValid(commonRows.r[rowIndex]) { - buf.SetOrDropStrict(rowIndex, data.Value(commonRows.r[rowIndex])) - } - } - case String: - data := array.NewStringData(right.Column(rightCol).Data()) - for rowIndex := 0; rowIndex < newNumRows; rowIndex++ { - if data.IsValid(commonRows.r[rowIndex]) { - buf.SetOrDropStrict(rowIndex, data.Value(commonRows.r[rowIndex])) - } - } - default: - panic(fmt.Errorf("unsupported type '%v'", right.ColumnType(rightCol))) - } - - (*newSeries)[colIndex] = NewSeriesFromBuffer(right.ColumnName(rightCol), buf) - rightCol++ - } -} - -func outerFillLeftBowCols(newSeries *[]Series, left, right *bow, newNumRows, uniquesLeft int, - commonCols map[string][]Buffer, commonRows struct{ l, r []int }) { - var leftRow, commonRow int - - for colIndex := 0; colIndex < left.NumCols(); colIndex++ { - leftRow = 0 - commonRow = 0 - buf := NewBuffer(newNumRows, left.ColumnType(colIndex)) - - // Fill rows from left bow - switch left.ColumnType(colIndex) { - case Int64: - data := array.NewInt64Data(left.Column(colIndex).Data()) - for newRow := 0; left.NumRows() > 0 && newRow < newNumRows; newRow++ { - if data.IsValid(leftRow) { - buf.SetOrDropStrict(newRow, data.Value(leftRow)) - } - for commonRow < len(commonRows.l) && - leftRow == commonRows.l[commonRow] && - newRow < newNumRows { - if data.IsValid(leftRow) { - buf.SetOrDropStrict(newRow, data.Value(leftRow)) - } - if commonRow+1 < len(commonRows.l) && - commonRows.l[commonRow+1] == leftRow { - newRow++ - } - commonRow++ - } - if leftRow++; leftRow >= left.NumRows() { - break - } - } - case Float64: - data := array.NewFloat64Data(left.Column(colIndex).Data()) - for newRow := 0; left.NumRows() > 0 && newRow < newNumRows; newRow++ { - if data.IsValid(leftRow) { - buf.SetOrDropStrict(newRow, data.Value(leftRow)) - } - for commonRow < len(commonRows.l) && - leftRow == commonRows.l[commonRow] && - newRow < newNumRows { - if data.IsValid(leftRow) { - buf.SetOrDropStrict(newRow, data.Value(leftRow)) - } - if commonRow+1 < len(commonRows.l) && - commonRows.l[commonRow+1] == leftRow { - newRow++ - } - commonRow++ - } - if leftRow++; leftRow >= left.NumRows() { - break - } - } - case Boolean: - data := array.NewBooleanData(left.Column(colIndex).Data()) - for newRow := 0; left.NumRows() > 0 && newRow < newNumRows; newRow++ { - if data.IsValid(leftRow) { - buf.SetOrDropStrict(newRow, data.Value(leftRow)) - } - for commonRow < len(commonRows.l) && - leftRow == commonRows.l[commonRow] && - newRow < newNumRows { - if data.IsValid(leftRow) { - buf.SetOrDropStrict(newRow, data.Value(leftRow)) - } - if commonRow+1 < len(commonRows.l) && - commonRows.l[commonRow+1] == leftRow { - newRow++ - } - commonRow++ - } - if leftRow++; leftRow >= left.NumRows() { - break - } - } - case String: - data := array.NewStringData(left.Column(colIndex).Data()) - for newRow := 0; left.NumRows() > 0 && newRow < newNumRows; newRow++ { - if data.IsValid(leftRow) { - buf.SetOrDropStrict(newRow, data.Value(leftRow)) - } - for commonRow < len(commonRows.l) && - leftRow == commonRows.l[commonRow] && - newRow < newNumRows { - if data.IsValid(leftRow) { - buf.SetOrDropStrict(newRow, data.Value(leftRow)) - } - if commonRow+1 < len(commonRows.l) && - commonRows.l[commonRow+1] == leftRow { - newRow++ - } - commonRow++ - } - if leftRow++; leftRow >= left.NumRows() { - break - } - } - default: - panic(fmt.Errorf("unsupported type '%v'", left.ColumnType(colIndex))) - } - - // Fill remaining rows from right bow if column is common - _, isColCommon := commonCols[left.ColumnName(colIndex)] - var newRow int - if isColCommon { - newRow = left.NumRows() + len(commonRows.l) - uniquesLeft - } - for rightRow := 0; isColCommon && rightRow < right.NumRows(); rightRow++ { - var isRowCommon bool - for commonRow := 0; commonRow < len(commonRows.r); commonRow++ { - if rightRow == commonRows.r[commonRow] { - isRowCommon = true - break - } - } - if !isRowCommon { - buf.SetOrDropStrict(newRow, commonCols[left.ColumnName(colIndex)][1].GetValue(rightRow)) - newRow++ - } - } - - (*newSeries)[colIndex] = NewSeriesFromBuffer(left.ColumnName(colIndex), buf) - } -} - -func outerFillRightBowCols(newSeries *[]Series, left, right *bow, newNumCols, - newNumRows, uniquesLeft int, commonCols map[string][]Buffer, - commonRows struct{ l, r []int }) { - var leftRow, commonRow, rightCol int - - for colIndex := left.NumCols(); colIndex < newNumCols; colIndex++ { - leftRow = 0 - commonRow = 0 - for commonCols[right.ColumnName(rightCol)] != nil { - rightCol++ - } - buf := NewBuffer(newNumRows, right.ColumnType(rightCol)) - - switch right.ColumnType(rightCol) { - case Int64: - data := array.NewInt64Data(right.Column(rightCol).Data()) - - // Fill common rows from right bow - for newRow := 0; newRow < newNumRows; newRow++ { - for commonRow < len(commonRows.l) && - leftRow == commonRows.l[commonRow] && - newRow < newNumRows { - if data.IsValid(commonRows.r[commonRow]) { - buf.SetOrDropStrict(newRow, data.Value(commonRows.r[commonRow])) - } - if commonRow+1 < len(commonRows.l) && - commonRows.l[commonRow+1] == leftRow { - newRow++ - } - commonRow++ - } - leftRow++ - } - - // Fill remaining rows from right bow - newRow := left.NumRows() + len(commonRows.r) - uniquesLeft - for rightRow := 0; rightRow < right.NumRows(); rightRow++ { - var isRowCommon bool - for commonRow := 0; commonRow < len(commonRows.r); commonRow++ { - if rightRow == commonRows.r[commonRow] { - isRowCommon = true - break - } - } - if !isRowCommon { - if data.IsValid(rightRow) { - buf.SetOrDropStrict(newRow, data.Value(rightRow)) - } - newRow++ - } - } - case Float64: - data := array.NewFloat64Data(right.Column(rightCol).Data()) - - // Fill common rows from right bow - for newRow := 0; newRow < newNumRows; newRow++ { - for commonRow < len(commonRows.l) && - leftRow == commonRows.l[commonRow] && - newRow < newNumRows { - if data.IsValid(commonRows.r[commonRow]) { - buf.SetOrDropStrict(newRow, data.Value(commonRows.r[commonRow])) - } - if commonRow+1 < len(commonRows.l) && - commonRows.l[commonRow+1] == leftRow { - newRow++ - } - commonRow++ - } - leftRow++ - } - - // Fill remaining rows from right bow - newRow := left.NumRows() + len(commonRows.r) - uniquesLeft - for rightRow := 0; rightRow < right.NumRows(); rightRow++ { - var isRowCommon bool - for commonRow := 0; commonRow < len(commonRows.r); commonRow++ { - if rightRow == commonRows.r[commonRow] { - isRowCommon = true - break - } - } - if !isRowCommon { - if data.IsValid(rightRow) { - buf.SetOrDropStrict(newRow, data.Value(rightRow)) - } - newRow++ - } - } - case Boolean: - data := array.NewBooleanData(right.Column(rightCol).Data()) - - // Fill common rows from right bow - for newRow := 0; newRow < newNumRows; newRow++ { - for commonRow < len(commonRows.l) && - leftRow == commonRows.l[commonRow] && - newRow < newNumRows { - if data.IsValid(commonRows.r[commonRow]) { - buf.SetOrDropStrict(newRow, data.Value(commonRows.r[commonRow])) - } - if commonRow+1 < len(commonRows.l) && - commonRows.l[commonRow+1] == leftRow { - newRow++ - } - commonRow++ - } - leftRow++ - } - - // Fill remaining rows from right bow - newRow := left.NumRows() + len(commonRows.r) - uniquesLeft - for rightRow := 0; rightRow < right.NumRows(); rightRow++ { - var isRowCommon bool - for commonRow := 0; commonRow < len(commonRows.r); commonRow++ { - if rightRow == commonRows.r[commonRow] { - isRowCommon = true - break - } - } - if !isRowCommon { - if data.IsValid(rightRow) { - buf.SetOrDropStrict(newRow, data.Value(rightRow)) - } - newRow++ - } - } - case String: - data := array.NewStringData(right.Column(rightCol).Data()) - - // Fill common rows from right bow - for newRow := 0; newRow < newNumRows; newRow++ { - for commonRow < len(commonRows.l) && - leftRow == commonRows.l[commonRow] && - newRow < newNumRows { - if data.IsValid(commonRows.r[commonRow]) { - buf.SetOrDropStrict(newRow, data.Value(commonRows.r[commonRow])) - } - if commonRow+1 < len(commonRows.l) && - commonRows.l[commonRow+1] == leftRow { - newRow++ - } - commonRow++ - } - leftRow++ - } - - // Fill remaining rows from right bow - newRow := left.NumRows() + len(commonRows.r) - uniquesLeft - for rightRow := 0; rightRow < right.NumRows(); rightRow++ { - var isRowCommon bool - for commonRow := 0; commonRow < len(commonRows.r); commonRow++ { - if rightRow == commonRows.r[commonRow] { - isRowCommon = true - break - } - } - if !isRowCommon { - if data.IsValid(rightRow) { - buf.SetOrDropStrict(newRow, data.Value(rightRow)) - } - newRow++ - } - } - default: - panic(fmt.Errorf("unsupported type '%v'", right.ColumnType(rightCol))) - } - (*newSeries)[colIndex] = NewSeriesFromBuffer(right.ColumnName(rightCol), buf) - rightCol++ - } -} diff --git a/bowjoin.gen.go.tmpl b/bowjoin.gen.go.tmpl deleted file mode 100644 index fe60161..0000000 --- a/bowjoin.gen.go.tmpl +++ /dev/null @@ -1,184 +0,0 @@ -package bow - -import ( - "fmt" - - "github.com/apache/arrow/go/v8/arrow/array" -) - -func innerFillLeftBowCols(newSeries *[]Series, left, right *bow, newNumRows int, - commonRows struct{ l, r []int }) { - - for colIndex := 0; colIndex < left.NumCols(); colIndex++ { - buf := NewBuffer(newNumRows, left.ColumnType(colIndex)) - switch left.ColumnType(colIndex) { - {{range .Data.types -}} - case {{ .Type }}: - data := array.New{{ .Type }}Data(left.Column(colIndex).Data()) - for rowIndex := 0; rowIndex < newNumRows; rowIndex++ { - if data.IsValid(commonRows.l[rowIndex]) { - buf.SetOrDropStrict(rowIndex, data.Value(commonRows.l[rowIndex])) - } - } - {{end -}} - default: - panic(fmt.Errorf("unsupported type '%v'", left.ColumnType(colIndex))) - } - - (*newSeries)[colIndex] = NewSeriesFromBuffer(left.ColumnName(colIndex), buf) - } -} - -func innerFillRightBowCols(newSeries *[]Series, left, right *bow, newNumRows, newNumCols int, - commonCols map[string][]Buffer, commonRows struct{ l, r []int }) { - var rightCol int - - for colIndex := left.NumCols(); colIndex < newNumCols; colIndex++ { - buf := NewBuffer(newNumRows, right.ColumnType(rightCol)) - for commonCols[right.ColumnName(rightCol)] != nil { - rightCol++ - } - - // Fill common rows from right bow - switch right.ColumnType(rightCol) { - {{range .Data.types -}} - case {{ .Type }}: - data := array.New{{ .Type }}Data(right.Column(rightCol).Data()) - for rowIndex := 0; rowIndex < newNumRows; rowIndex++ { - if data.IsValid(commonRows.r[rowIndex]) { - buf.SetOrDropStrict(rowIndex, data.Value(commonRows.r[rowIndex])) - } - } - {{end -}} - default: - panic(fmt.Errorf("unsupported type '%v'", right.ColumnType(rightCol))) - } - - (*newSeries)[colIndex] = NewSeriesFromBuffer(right.ColumnName(rightCol), buf) - rightCol++ - } -} - -func outerFillLeftBowCols(newSeries *[]Series, left, right *bow, newNumRows, uniquesLeft int, - commonCols map[string][]Buffer, commonRows struct{ l, r []int }) { - var leftRow, commonRow int - - for colIndex := 0; colIndex < left.NumCols(); colIndex++ { - leftRow = 0 - commonRow = 0 - buf := NewBuffer(newNumRows, left.ColumnType(colIndex)) - - // Fill rows from left bow - switch left.ColumnType(colIndex) { - {{range .Data.types -}} - case {{ .Type }}: - data := array.New{{ .Type }}Data(left.Column(colIndex).Data()) - for newRow := 0; left.NumRows() > 0 && newRow < newNumRows; newRow++ { - if data.IsValid(leftRow) { - buf.SetOrDropStrict(newRow, data.Value(leftRow)) - } - for commonRow < len(commonRows.l) && - leftRow == commonRows.l[commonRow] && - newRow < newNumRows { - if data.IsValid(leftRow) { - buf.SetOrDropStrict(newRow, data.Value(leftRow)) - } - if commonRow+1 < len(commonRows.l) && - commonRows.l[commonRow+1] == leftRow { - newRow++ - } - commonRow++ - } - if leftRow++; leftRow >= left.NumRows() { - break - } - } - {{end -}} - default: - panic(fmt.Errorf("unsupported type '%v'", left.ColumnType(colIndex))) - } - - // Fill remaining rows from right bow if column is common - _, isColCommon := commonCols[left.ColumnName(colIndex)] - var newRow int - if isColCommon { - newRow = left.NumRows() + len(commonRows.l) - uniquesLeft - } - for rightRow := 0; isColCommon && rightRow < right.NumRows(); rightRow++ { - var isRowCommon bool - for commonRow := 0; commonRow < len(commonRows.r); commonRow++ { - if rightRow == commonRows.r[commonRow] { - isRowCommon = true - break - } - } - if !isRowCommon { - buf.SetOrDropStrict(newRow, commonCols[left.ColumnName(colIndex)][1].GetValue(rightRow)) - newRow++ - } - } - - (*newSeries)[colIndex] = NewSeriesFromBuffer(left.ColumnName(colIndex), buf) - } -} - -func outerFillRightBowCols(newSeries *[]Series, left, right *bow, newNumCols, - newNumRows, uniquesLeft int, commonCols map[string][]Buffer, - commonRows struct{ l, r []int }) { - var leftRow, commonRow, rightCol int - - for colIndex := left.NumCols(); colIndex < newNumCols; colIndex++ { - leftRow = 0 - commonRow = 0 - for commonCols[right.ColumnName(rightCol)] != nil { - rightCol++ - } - buf := NewBuffer(newNumRows, right.ColumnType(rightCol)) - - switch right.ColumnType(rightCol) { - {{range .Data.types -}} - case {{ .Type }}: - data := array.New{{ .Type }}Data(right.Column(rightCol).Data()) - - // Fill common rows from right bow - for newRow := 0; newRow < newNumRows; newRow++ { - for commonRow < len(commonRows.l) && - leftRow == commonRows.l[commonRow] && - newRow < newNumRows { - if data.IsValid(commonRows.r[commonRow]) { - buf.SetOrDropStrict(newRow, data.Value(commonRows.r[commonRow])) - } - if commonRow+1 < len(commonRows.l) && - commonRows.l[commonRow+1] == leftRow { - newRow++ - } - commonRow++ - } - leftRow++ - } - - // Fill remaining rows from right bow - newRow := left.NumRows() + len(commonRows.r) - uniquesLeft - for rightRow := 0; rightRow < right.NumRows(); rightRow++ { - var isRowCommon bool - for commonRow := 0; commonRow < len(commonRows.r); commonRow++ { - if rightRow == commonRows.r[commonRow] { - isRowCommon = true - break - } - } - if !isRowCommon { - if data.IsValid(rightRow) { - buf.SetOrDropStrict(newRow, data.Value(rightRow)) - } - newRow++ - } - } - {{end -}} - default: - panic(fmt.Errorf("unsupported type '%v'", right.ColumnType(rightCol))) - } - (*newSeries)[colIndex] = NewSeriesFromBuffer(right.ColumnName(rightCol), buf) - rightCol++ - } -} diff --git a/bowjoin.go b/bowjoin.go index 0cdd581..73698f0 100644 --- a/bowjoin.go +++ b/bowjoin.go @@ -3,6 +3,8 @@ package bow import ( "fmt" "sort" + + "github.com/apache/arrow/go/v8/arrow/array" ) // InnerJoin joins columns of two Bows on common columns and rows. @@ -182,3 +184,391 @@ func getCommonRows(left, right Bow, commonColBufs map[string][]Buffer) CommonRow return commonRows } + +func innerFillLeftBowCols(newSeries *[]Series, left, right *bow, newNumRows int, + commonRows struct{ l, r []int }) { + + for colIndex := 0; colIndex < left.NumCols(); colIndex++ { + buf := NewBuffer(newNumRows, left.ColumnType(colIndex)) + switch left.ColumnType(colIndex) { + case Int64: + data := array.NewInt64Data(left.Column(colIndex).Data()) + for rowIndex := 0; rowIndex < newNumRows; rowIndex++ { + if data.IsValid(commonRows.l[rowIndex]) { + buf.SetOrDropStrict(rowIndex, data.Value(commonRows.l[rowIndex])) + } + } + case Float64: + data := array.NewFloat64Data(left.Column(colIndex).Data()) + for rowIndex := 0; rowIndex < newNumRows; rowIndex++ { + if data.IsValid(commonRows.l[rowIndex]) { + buf.SetOrDropStrict(rowIndex, data.Value(commonRows.l[rowIndex])) + } + } + case Boolean: + data := array.NewBooleanData(left.Column(colIndex).Data()) + for rowIndex := 0; rowIndex < newNumRows; rowIndex++ { + if data.IsValid(commonRows.l[rowIndex]) { + buf.SetOrDropStrict(rowIndex, data.Value(commonRows.l[rowIndex])) + } + } + case String: + data := array.NewStringData(left.Column(colIndex).Data()) + for rowIndex := 0; rowIndex < newNumRows; rowIndex++ { + if data.IsValid(commonRows.l[rowIndex]) { + buf.SetOrDropStrict(rowIndex, data.Value(commonRows.l[rowIndex])) + } + } + default: + panic(fmt.Errorf("unsupported type '%v'", left.ColumnType(colIndex))) + } + + (*newSeries)[colIndex] = NewSeriesFromBuffer(left.ColumnName(colIndex), buf) + } +} + +func innerFillRightBowCols(newSeries *[]Series, left, right *bow, newNumRows, newNumCols int, + commonCols map[string][]Buffer, commonRows struct{ l, r []int }) { + var rightCol int + + for colIndex := left.NumCols(); colIndex < newNumCols; colIndex++ { + buf := NewBuffer(newNumRows, right.ColumnType(rightCol)) + for commonCols[right.ColumnName(rightCol)] != nil { + rightCol++ + } + + // Fill common rows from right bow + switch right.ColumnType(rightCol) { + case Int64: + data := array.NewInt64Data(right.Column(rightCol).Data()) + for rowIndex := 0; rowIndex < newNumRows; rowIndex++ { + if data.IsValid(commonRows.r[rowIndex]) { + buf.SetOrDropStrict(rowIndex, data.Value(commonRows.r[rowIndex])) + } + } + case Float64: + data := array.NewFloat64Data(right.Column(rightCol).Data()) + for rowIndex := 0; rowIndex < newNumRows; rowIndex++ { + if data.IsValid(commonRows.r[rowIndex]) { + buf.SetOrDropStrict(rowIndex, data.Value(commonRows.r[rowIndex])) + } + } + case Boolean: + data := array.NewBooleanData(right.Column(rightCol).Data()) + for rowIndex := 0; rowIndex < newNumRows; rowIndex++ { + if data.IsValid(commonRows.r[rowIndex]) { + buf.SetOrDropStrict(rowIndex, data.Value(commonRows.r[rowIndex])) + } + } + case String: + data := array.NewStringData(right.Column(rightCol).Data()) + for rowIndex := 0; rowIndex < newNumRows; rowIndex++ { + if data.IsValid(commonRows.r[rowIndex]) { + buf.SetOrDropStrict(rowIndex, data.Value(commonRows.r[rowIndex])) + } + } + default: + panic(fmt.Errorf("unsupported type '%v'", right.ColumnType(rightCol))) + } + + (*newSeries)[colIndex] = NewSeriesFromBuffer(right.ColumnName(rightCol), buf) + rightCol++ + } +} + +func outerFillLeftBowCols(newSeries *[]Series, left, right *bow, newNumRows, uniquesLeft int, + commonCols map[string][]Buffer, commonRows struct{ l, r []int }) { + var leftRow, commonRow int + + for colIndex := 0; colIndex < left.NumCols(); colIndex++ { + leftRow = 0 + commonRow = 0 + buf := NewBuffer(newNumRows, left.ColumnType(colIndex)) + + // Fill rows from left bow + switch left.ColumnType(colIndex) { + case Int64: + data := array.NewInt64Data(left.Column(colIndex).Data()) + for newRow := 0; left.NumRows() > 0 && newRow < newNumRows; newRow++ { + if data.IsValid(leftRow) { + buf.SetOrDropStrict(newRow, data.Value(leftRow)) + } + for commonRow < len(commonRows.l) && + leftRow == commonRows.l[commonRow] && + newRow < newNumRows { + if data.IsValid(leftRow) { + buf.SetOrDropStrict(newRow, data.Value(leftRow)) + } + if commonRow+1 < len(commonRows.l) && + commonRows.l[commonRow+1] == leftRow { + newRow++ + } + commonRow++ + } + if leftRow++; leftRow >= left.NumRows() { + break + } + } + case Float64: + data := array.NewFloat64Data(left.Column(colIndex).Data()) + for newRow := 0; left.NumRows() > 0 && newRow < newNumRows; newRow++ { + if data.IsValid(leftRow) { + buf.SetOrDropStrict(newRow, data.Value(leftRow)) + } + for commonRow < len(commonRows.l) && + leftRow == commonRows.l[commonRow] && + newRow < newNumRows { + if data.IsValid(leftRow) { + buf.SetOrDropStrict(newRow, data.Value(leftRow)) + } + if commonRow+1 < len(commonRows.l) && + commonRows.l[commonRow+1] == leftRow { + newRow++ + } + commonRow++ + } + if leftRow++; leftRow >= left.NumRows() { + break + } + } + case Boolean: + data := array.NewBooleanData(left.Column(colIndex).Data()) + for newRow := 0; left.NumRows() > 0 && newRow < newNumRows; newRow++ { + if data.IsValid(leftRow) { + buf.SetOrDropStrict(newRow, data.Value(leftRow)) + } + for commonRow < len(commonRows.l) && + leftRow == commonRows.l[commonRow] && + newRow < newNumRows { + if data.IsValid(leftRow) { + buf.SetOrDropStrict(newRow, data.Value(leftRow)) + } + if commonRow+1 < len(commonRows.l) && + commonRows.l[commonRow+1] == leftRow { + newRow++ + } + commonRow++ + } + if leftRow++; leftRow >= left.NumRows() { + break + } + } + case String: + data := array.NewStringData(left.Column(colIndex).Data()) + for newRow := 0; left.NumRows() > 0 && newRow < newNumRows; newRow++ { + if data.IsValid(leftRow) { + buf.SetOrDropStrict(newRow, data.Value(leftRow)) + } + for commonRow < len(commonRows.l) && + leftRow == commonRows.l[commonRow] && + newRow < newNumRows { + if data.IsValid(leftRow) { + buf.SetOrDropStrict(newRow, data.Value(leftRow)) + } + if commonRow+1 < len(commonRows.l) && + commonRows.l[commonRow+1] == leftRow { + newRow++ + } + commonRow++ + } + if leftRow++; leftRow >= left.NumRows() { + break + } + } + default: + panic(fmt.Errorf("unsupported type '%v'", left.ColumnType(colIndex))) + } + + // Fill remaining rows from right bow if column is common + _, isColCommon := commonCols[left.ColumnName(colIndex)] + var newRow int + if isColCommon { + newRow = left.NumRows() + len(commonRows.l) - uniquesLeft + } + for rightRow := 0; isColCommon && rightRow < right.NumRows(); rightRow++ { + var isRowCommon bool + for commonRow := 0; commonRow < len(commonRows.r); commonRow++ { + if rightRow == commonRows.r[commonRow] { + isRowCommon = true + break + } + } + if !isRowCommon { + buf.SetOrDropStrict(newRow, commonCols[left.ColumnName(colIndex)][1].GetValue(rightRow)) + newRow++ + } + } + + (*newSeries)[colIndex] = NewSeriesFromBuffer(left.ColumnName(colIndex), buf) + } +} + +func outerFillRightBowCols(newSeries *[]Series, left, right *bow, newNumCols, + newNumRows, uniquesLeft int, commonCols map[string][]Buffer, + commonRows struct{ l, r []int }) { + var leftRow, commonRow, rightCol int + + for colIndex := left.NumCols(); colIndex < newNumCols; colIndex++ { + leftRow = 0 + commonRow = 0 + for commonCols[right.ColumnName(rightCol)] != nil { + rightCol++ + } + buf := NewBuffer(newNumRows, right.ColumnType(rightCol)) + + switch right.ColumnType(rightCol) { + case Int64: + data := array.NewInt64Data(right.Column(rightCol).Data()) + + // Fill common rows from right bow + for newRow := 0; newRow < newNumRows; newRow++ { + for commonRow < len(commonRows.l) && + leftRow == commonRows.l[commonRow] && + newRow < newNumRows { + if data.IsValid(commonRows.r[commonRow]) { + buf.SetOrDropStrict(newRow, data.Value(commonRows.r[commonRow])) + } + if commonRow+1 < len(commonRows.l) && + commonRows.l[commonRow+1] == leftRow { + newRow++ + } + commonRow++ + } + leftRow++ + } + + // Fill remaining rows from right bow + newRow := left.NumRows() + len(commonRows.r) - uniquesLeft + for rightRow := 0; rightRow < right.NumRows(); rightRow++ { + var isRowCommon bool + for commonRow := 0; commonRow < len(commonRows.r); commonRow++ { + if rightRow == commonRows.r[commonRow] { + isRowCommon = true + break + } + } + if !isRowCommon { + if data.IsValid(rightRow) { + buf.SetOrDropStrict(newRow, data.Value(rightRow)) + } + newRow++ + } + } + case Float64: + data := array.NewFloat64Data(right.Column(rightCol).Data()) + + // Fill common rows from right bow + for newRow := 0; newRow < newNumRows; newRow++ { + for commonRow < len(commonRows.l) && + leftRow == commonRows.l[commonRow] && + newRow < newNumRows { + if data.IsValid(commonRows.r[commonRow]) { + buf.SetOrDropStrict(newRow, data.Value(commonRows.r[commonRow])) + } + if commonRow+1 < len(commonRows.l) && + commonRows.l[commonRow+1] == leftRow { + newRow++ + } + commonRow++ + } + leftRow++ + } + + // Fill remaining rows from right bow + newRow := left.NumRows() + len(commonRows.r) - uniquesLeft + for rightRow := 0; rightRow < right.NumRows(); rightRow++ { + var isRowCommon bool + for commonRow := 0; commonRow < len(commonRows.r); commonRow++ { + if rightRow == commonRows.r[commonRow] { + isRowCommon = true + break + } + } + if !isRowCommon { + if data.IsValid(rightRow) { + buf.SetOrDropStrict(newRow, data.Value(rightRow)) + } + newRow++ + } + } + case Boolean: + data := array.NewBooleanData(right.Column(rightCol).Data()) + + // Fill common rows from right bow + for newRow := 0; newRow < newNumRows; newRow++ { + for commonRow < len(commonRows.l) && + leftRow == commonRows.l[commonRow] && + newRow < newNumRows { + if data.IsValid(commonRows.r[commonRow]) { + buf.SetOrDropStrict(newRow, data.Value(commonRows.r[commonRow])) + } + if commonRow+1 < len(commonRows.l) && + commonRows.l[commonRow+1] == leftRow { + newRow++ + } + commonRow++ + } + leftRow++ + } + + // Fill remaining rows from right bow + newRow := left.NumRows() + len(commonRows.r) - uniquesLeft + for rightRow := 0; rightRow < right.NumRows(); rightRow++ { + var isRowCommon bool + for commonRow := 0; commonRow < len(commonRows.r); commonRow++ { + if rightRow == commonRows.r[commonRow] { + isRowCommon = true + break + } + } + if !isRowCommon { + if data.IsValid(rightRow) { + buf.SetOrDropStrict(newRow, data.Value(rightRow)) + } + newRow++ + } + } + case String: + data := array.NewStringData(right.Column(rightCol).Data()) + + // Fill common rows from right bow + for newRow := 0; newRow < newNumRows; newRow++ { + for commonRow < len(commonRows.l) && + leftRow == commonRows.l[commonRow] && + newRow < newNumRows { + if data.IsValid(commonRows.r[commonRow]) { + buf.SetOrDropStrict(newRow, data.Value(commonRows.r[commonRow])) + } + if commonRow+1 < len(commonRows.l) && + commonRows.l[commonRow+1] == leftRow { + newRow++ + } + commonRow++ + } + leftRow++ + } + + // Fill remaining rows from right bow + newRow := left.NumRows() + len(commonRows.r) - uniquesLeft + for rightRow := 0; rightRow < right.NumRows(); rightRow++ { + var isRowCommon bool + for commonRow := 0; commonRow < len(commonRows.r); commonRow++ { + if rightRow == commonRows.r[commonRow] { + isRowCommon = true + break + } + } + if !isRowCommon { + if data.IsValid(rightRow) { + buf.SetOrDropStrict(newRow, data.Value(rightRow)) + } + newRow++ + } + } + default: + panic(fmt.Errorf("unsupported type '%v'", right.ColumnType(rightCol))) + } + (*newSeries)[colIndex] = NewSeriesFromBuffer(right.ColumnName(rightCol), buf) + rightCol++ + } +} diff --git a/bowseries.gen.go b/bowseries.gen.go deleted file mode 100644 index b724393..0000000 --- a/bowseries.gen.go +++ /dev/null @@ -1,162 +0,0 @@ -// Code generated by bowseries.gen.go.tmpl. DO NOT EDIT. - -package bow - -import ( - "fmt" - "github.com/apache/arrow/go/v8/arrow" - "github.com/apache/arrow/go/v8/arrow/array" - "github.com/apache/arrow/go/v8/arrow/bitutil" - "github.com/apache/arrow/go/v8/arrow/memory" -) - -// NewSeries returns a new Series from: -// - name: string -// - dataArray: slice of the data in any of the Bow supported types -// - validityArray: -// - If nil, the data will be non-nil -// - Can be of type []bool or []byte to represent nil values -func NewSeries(name string, dataArray interface{}, validityArray interface{}) Series { - switch v := dataArray.(type) { - case []int64: - return newInt64Series(name, v, buildNullBitmapBytes(len(v), validityArray)) - case []float64: - return newFloat64Series(name, v, buildNullBitmapBytes(len(v), validityArray)) - case []bool: - return newBooleanSeries(name, v, buildNullBitmapBytes(len(v), validityArray)) - case []string: - return newStringSeries(name, v, buildNullBitmapBytes(len(v), validityArray)) - default: - panic(fmt.Errorf("unsupported type %T", v)) - } -} - -// NewSeriesFromBuffer returns a new Series from a name and a Buffer. -func NewSeriesFromBuffer(name string, buf Buffer) Series { - switch data := buf.Data.(type) { - case []int64: - return newInt64Series(name, data, buf.nullBitmapBytes) - case []float64: - return newFloat64Series(name, data, buf.nullBitmapBytes) - case []bool: - return newBooleanSeries(name, data, buf.nullBitmapBytes) - case []string: - return newStringSeries(name, data, buf.nullBitmapBytes) - default: - panic(fmt.Errorf("unsupported type '%T'", buf.Data)) - } -} - -func newInt64Series(name string, data []int64, valid []byte) Series { - length := len(data) - return Series{ - Name: name, - Array: array.NewInt64Data( - array.NewData(mapBowToArrowTypes[Int64], length, - []*memory.Buffer{ - memory.NewBufferBytes(valid), - memory.NewBufferBytes(arrow.Int64Traits.CastToBytes(data)), - }, nil, length-bitutil.CountSetBits(valid, 0, length), 0), - ), - } -} - -func newFloat64Series(name string, data []float64, valid []byte) Series { - length := len(data) - return Series{ - Name: name, - Array: array.NewFloat64Data( - array.NewData(mapBowToArrowTypes[Float64], length, - []*memory.Buffer{ - memory.NewBufferBytes(valid), - memory.NewBufferBytes(arrow.Float64Traits.CastToBytes(data)), - }, nil, length-bitutil.CountSetBits(valid, 0, length), 0), - ), - } -} - -func newBooleanSeries(name string, data []bool, valid []byte) Series { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - builder := array.NewBooleanBuilder(mem) - defer builder.Release() - builder.AppendValues(data, buildNullBitmapBool(len(data), valid)) - return Series{Name: name, Array: builder.NewArray()} -} - -func newStringSeries(name string, data []string, valid []byte) Series { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - builder := array.NewStringBuilder(mem) - defer builder.Release() - builder.AppendValues(data, buildNullBitmapBool(len(data), valid)) - return Series{Name: name, Array: builder.NewArray()} -} - -// NewSeriesFromInterfaces returns a new Series from: -// - name: string -// - typ: Bow Type -// - data: represented by an slice of interface{}, with eventually nil values -func NewSeriesFromInterfaces(name string, typ Type, data []interface{}) Series { - if typ == Unknown { - var err error - if typ, err = getBowTypeFromInterfaces(data); err != nil { - panic(err) - } - } - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - switch typ { - case Int64: - builder := array.NewInt64Builder(mem) - defer builder.Release() - builder.Resize(len(data)) - for i := 0; i < len(data); i++ { - v, ok := ToInt64(data[i]) - if !ok { - builder.AppendNull() - continue - } - builder.Append(v) - } - return Series{Name: name, Array: builder.NewArray()} - case Float64: - builder := array.NewFloat64Builder(mem) - defer builder.Release() - builder.Resize(len(data)) - for i := 0; i < len(data); i++ { - v, ok := ToFloat64(data[i]) - if !ok { - builder.AppendNull() - continue - } - builder.Append(v) - } - return Series{Name: name, Array: builder.NewArray()} - case Boolean: - builder := array.NewBooleanBuilder(mem) - defer builder.Release() - builder.Resize(len(data)) - for i := 0; i < len(data); i++ { - v, ok := ToBoolean(data[i]) - if !ok { - builder.AppendNull() - continue - } - builder.Append(v) - } - return Series{Name: name, Array: builder.NewArray()} - case String: - builder := array.NewStringBuilder(mem) - defer builder.Release() - builder.Resize(len(data)) - for i := 0; i < len(data); i++ { - v, ok := ToString(data[i]) - if !ok { - builder.AppendNull() - continue - } - builder.Append(v) - } - return Series{Name: name, Array: builder.NewArray()} - default: - panic(fmt.Errorf("unhandled type %s", typ)) - } -} diff --git a/bowseries.gen.go.tmpl b/bowseries.gen.go.tmpl deleted file mode 100644 index 19a2a2e..0000000 --- a/bowseries.gen.go.tmpl +++ /dev/null @@ -1,96 +0,0 @@ -package bow - -import ( - "fmt" - "github.com/apache/arrow/go/v8/arrow" - "github.com/apache/arrow/go/v8/arrow/array" - "github.com/apache/arrow/go/v8/arrow/bitutil" - "github.com/apache/arrow/go/v8/arrow/memory" -) - -// NewSeries returns a new Series from: -// - name: string -// - dataArray: slice of the data in any of the Bow supported types -// - validityArray: -// - If nil, the data will be non-nil -// - Can be of type []bool or []byte to represent nil values -func NewSeries(name string, dataArray interface{}, validityArray interface{}) Series { - switch v := dataArray.(type) { - {{range .Data.types -}} - case []{{ .type }}: - return new{{ .Type }}Series(name, v, buildNullBitmapBytes(len(v), validityArray)) - {{end -}} - default: - panic(fmt.Errorf("unsupported type %T", v)) - } -} - -// NewSeriesFromBuffer returns a new Series from a name and a Buffer. -func NewSeriesFromBuffer(name string, buf Buffer) Series { - switch data := buf.Data.(type) { - {{range .Data.types -}} - case []{{ .type }}: - return new{{ .Type }}Series(name, data, buf.nullBitmapBytes) - {{end -}} - default: - panic(fmt.Errorf("unsupported type '%T'", buf.Data)) - } -} - -{{range .Data.types -}} -func new{{ .Type }}Series(name string, data []{{ .type }}, valid []byte) Series { - {{ if .bufferMemoryMappingEqualArrow -}} - length := len(data) - return Series{ - Name: name, - Array: array.New{{ .Type }}Data( - array.NewData(mapBowToArrowTypes[{{ .Type }}], length, - []*memory.Buffer{ - memory.NewBufferBytes(valid), - memory.NewBufferBytes(arrow.{{ .Type }}Traits.CastToBytes(data)), - }, nil, length-bitutil.CountSetBits(valid, 0, length), 0), - ), - } - {{ else -}} - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - builder := array.New{{ .Type }}Builder(mem) - defer builder.Release() - builder.AppendValues(data, buildNullBitmapBool(len(data), valid)) - return Series{Name: name, Array: builder.NewArray()} - {{ end -}} -} - -{{end -}} - -// NewSeriesFromInterfaces returns a new Series from: -// - name: string -// - typ: Bow Type -// - data: represented by an slice of interface{}, with eventually nil values -func NewSeriesFromInterfaces(name string, typ Type, data []interface{}) Series { - if typ == Unknown { - var err error - if typ, err = getBowTypeFromInterfaces(data); err != nil { - panic(err) - } - } - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - switch typ { - {{range .Data.types -}} - case {{ .Type }}: - builder := array.New{{ .Type }}Builder(mem) - defer builder.Release() - builder.Resize(len(data)) - for i := 0; i < len(data); i++ { - v, ok := To{{ .Type }}(data[i]) - if !ok { - builder.AppendNull() - continue - } - builder.Append(v) - } - return Series{Name: name, Array: builder.NewArray()} - {{end -}} - default: - panic(fmt.Errorf("unhandled type %s", typ)) - } -} \ No newline at end of file diff --git a/bowseries.gen_test.go b/bowseries.gen_test.go deleted file mode 100644 index b7af97b..0000000 --- a/bowseries.gen_test.go +++ /dev/null @@ -1,20 +0,0 @@ -package bow - -import ( - "testing" - - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" -) - -func TestNewSeriesFromColBasedInterfaces(t *testing.T) { - for _, typ := range allType { - t.Run(typ.String(), func(t *testing.T) { - testcase := []interface{}{typ.Convert(0), nil} - res, err := NewBow(NewSeriesFromInterfaces(typ.String(), typ, testcase)) - require.NoError(t, err) - assert.Equal(t, typ.Convert(0), res.GetValue(0, 0)) - assert.Equal(t, nil, res.GetValue(0, 1)) - }) - } -} diff --git a/bowseries.go b/bowseries.go index c1e098c..603de02 100644 --- a/bowseries.go +++ b/bowseries.go @@ -5,7 +5,9 @@ import ( "fmt" "github.com/apache/arrow/go/v8/arrow" + "github.com/apache/arrow/go/v8/arrow/array" "github.com/apache/arrow/go/v8/arrow/bitutil" + "github.com/apache/arrow/go/v8/arrow/memory" ) // Series is wrapping the Apache Arrow arrow.Array interface, with the addition of a name. @@ -15,6 +17,157 @@ type Series struct { Array arrow.Array } +// NewSeries returns a new Series from: +// - name: string +// - dataArray: slice of the data in any of the Bow supported types +// - validityArray: +// - If nil, the data will be non-nil +// - Can be of type []bool or []byte to represent nil values +func NewSeries(name string, dataArray interface{}, validityArray interface{}) Series { + switch v := dataArray.(type) { + case []int64: + return newInt64Series(name, v, buildNullBitmapBytes(len(v), validityArray)) + case []float64: + return newFloat64Series(name, v, buildNullBitmapBytes(len(v), validityArray)) + case []bool: + return newBooleanSeries(name, v, buildNullBitmapBytes(len(v), validityArray)) + case []string: + return newStringSeries(name, v, buildNullBitmapBytes(len(v), validityArray)) + default: + panic(fmt.Errorf("unsupported type %T", v)) + } +} + +// NewSeriesFromBuffer returns a new Series from a name and a Buffer. +func NewSeriesFromBuffer(name string, buf Buffer) Series { + switch data := buf.Data.(type) { + case []int64: + return newInt64Series(name, data, buf.nullBitmapBytes) + case []float64: + return newFloat64Series(name, data, buf.nullBitmapBytes) + case []bool: + return newBooleanSeries(name, data, buf.nullBitmapBytes) + case []string: + return newStringSeries(name, data, buf.nullBitmapBytes) + default: + panic(fmt.Errorf("unsupported type '%T'", buf.Data)) + } +} + +func newInt64Series(name string, data []int64, valid []byte) Series { + length := len(data) + return Series{ + Name: name, + Array: array.NewInt64Data( + array.NewData(mapBowToArrowTypes[Int64], length, + []*memory.Buffer{ + memory.NewBufferBytes(valid), + memory.NewBufferBytes(arrow.Int64Traits.CastToBytes(data)), + }, nil, length-bitutil.CountSetBits(valid, 0, length), 0), + ), + } +} + +func newFloat64Series(name string, data []float64, valid []byte) Series { + length := len(data) + return Series{ + Name: name, + Array: array.NewFloat64Data( + array.NewData(mapBowToArrowTypes[Float64], length, + []*memory.Buffer{ + memory.NewBufferBytes(valid), + memory.NewBufferBytes(arrow.Float64Traits.CastToBytes(data)), + }, nil, length-bitutil.CountSetBits(valid, 0, length), 0), + ), + } +} + +func newBooleanSeries(name string, data []bool, valid []byte) Series { + mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) + builder := array.NewBooleanBuilder(mem) + defer builder.Release() + builder.AppendValues(data, buildNullBitmapBool(len(data), valid)) + return Series{Name: name, Array: builder.NewArray()} +} + +func newStringSeries(name string, data []string, valid []byte) Series { + mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) + builder := array.NewStringBuilder(mem) + defer builder.Release() + builder.AppendValues(data, buildNullBitmapBool(len(data), valid)) + return Series{Name: name, Array: builder.NewArray()} +} + +// NewSeriesFromInterfaces returns a new Series from: +// - name: string +// - typ: Bow Type +// - data: represented by a slice of interface{}, with eventually nil values +func NewSeriesFromInterfaces(name string, typ Type, data []interface{}) Series { + if typ == Unknown { + var err error + if typ, err = getBowTypeFromInterfaces(data); err != nil { + panic(err) + } + } + mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) + switch typ { + case Int64: + builder := array.NewInt64Builder(mem) + defer builder.Release() + builder.Resize(len(data)) + for i := 0; i < len(data); i++ { + v, ok := ToInt64(data[i]) + if !ok { + builder.AppendNull() + continue + } + builder.Append(v) + } + return Series{Name: name, Array: builder.NewArray()} + case Float64: + builder := array.NewFloat64Builder(mem) + defer builder.Release() + builder.Resize(len(data)) + for i := 0; i < len(data); i++ { + v, ok := ToFloat64(data[i]) + if !ok { + builder.AppendNull() + continue + } + builder.Append(v) + } + return Series{Name: name, Array: builder.NewArray()} + case Boolean: + builder := array.NewBooleanBuilder(mem) + defer builder.Release() + builder.Resize(len(data)) + for i := 0; i < len(data); i++ { + v, ok := ToBoolean(data[i]) + if !ok { + builder.AppendNull() + continue + } + builder.Append(v) + } + return Series{Name: name, Array: builder.NewArray()} + case String: + builder := array.NewStringBuilder(mem) + defer builder.Release() + builder.Resize(len(data)) + for i := 0; i < len(data); i++ { + v, ok := ToString(data[i]) + if !ok { + builder.AppendNull() + continue + } + builder.Append(v) + } + return Series{Name: name, Array: builder.NewArray()} + default: + panic(fmt.Errorf("unhandled type %s", typ)) + } +} + func buildNullBitmapBool(dataLength int, validityArray interface{}) []bool { switch valid := validityArray.(type) { case nil: diff --git a/bowseries_test.go b/bowseries_test.go index 98eab58..990b119 100644 --- a/bowseries_test.go +++ b/bowseries_test.go @@ -3,8 +3,23 @@ package bow import ( "fmt" "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" ) +func TestNewSeriesFromColBasedInterfaces(t *testing.T) { + for _, typ := range allType { + t.Run(typ.String(), func(t *testing.T) { + testcase := []interface{}{typ.Convert(0), nil} + res, err := NewBow(NewSeriesFromInterfaces(typ.String(), typ, testcase)) + require.NoError(t, err) + assert.Equal(t, typ.Convert(0), res.GetValue(0, 0)) + assert.Equal(t, nil, res.GetValue(0, 1)) + }) + } +} + func BenchmarkNewSeries(b *testing.B) { for rows := 10; rows <= 100000; rows *= 10 { dataArray := make([]int64, rows) diff --git a/datatypes.yml b/datatypes.yml deleted file mode 100644 index 8bde5b8..0000000 --- a/datatypes.yml +++ /dev/null @@ -1,16 +0,0 @@ -types: - - Type: Int64 - type: int64 - comparable: true - bufferMemoryMappingEqualArrow: true - - Type: Float64 - type: float64 - comparable: true - bufferMemoryMappingEqualArrow: true - - Type: Boolean - type: bool - bufferMemoryMappingEqualArrow: false - - Type: String - type: string - comparable: true - bufferMemoryMappingEqualArrow: false diff --git a/doc.go b/doc.go deleted file mode 100644 index 4cc7dd8..0000000 --- a/doc.go +++ /dev/null @@ -1,6 +0,0 @@ -package bow - -//go:generate genius tmpl -d datatypes.yml -p ./ bowbuffer.gen.go.tmpl -//go:generate genius tmpl -d datatypes.yml -p ./ bowseries.gen.go.tmpl -//go:generate genius tmpl -d datatypes.yml -p ./ bowappend.gen.go.tmpl -//go:generate genius tmpl -d datatypes.yml -p ./ bowjoin.gen.go.tmpl From 33900cbbbdb602cf6e7f5eae94dc3f79af9b71c2 Mon Sep 17 00:00:00 2001 From: agelloz Date: Thu, 28 Apr 2022 13:57:12 +0200 Subject: [PATCH 09/29] corrections --- XXXexamples_test.go | 3 --- bow.go | 6 +++--- bowappend.gen.go | 14 +++++--------- bowappend.gen.go.tmpl | 5 ++--- bowbuffer.gen.go | 37 +++++++++++++------------------------ bowbuffer.gen.go.tmpl | 22 ++++++++++------------ bowdiff.go | 2 +- bowfill.go | 2 +- bowjoin.gen.go | 28 ++++++++++++++-------------- bowjoin.gen.go.tmpl | 16 ++++++++-------- bowrecord.go | 2 +- bowseries.gen.go | 4 ++-- bowseries.gen.go.tmpl | 4 ++-- bowseries.go | 2 +- bowstring.go | 11 ++++++----- 15 files changed, 69 insertions(+), 89 deletions(-) diff --git a/XXXexamples_test.go b/XXXexamples_test.go index d3e6f57..ebf00ac 100644 --- a/XXXexamples_test.go +++ b/XXXexamples_test.go @@ -23,7 +23,6 @@ func ExampleNewBow() { // 2 // 3 3.3 true // 4 4 false - // metadata: [] } func ExampleNewBowFromColBasedInterfaces() { @@ -47,7 +46,6 @@ func ExampleNewBowFromColBasedInterfaces() { // 1 1 1.1 // 1 2 // 3 3 1.3 - // metadata: [] } func ExampleNewBowFromRowBasedInterfaces() { @@ -70,7 +68,6 @@ func ExampleNewBowFromRowBasedInterfaces() { // 1 1 1.1 // 1 2 // 3 3 1.3 - // metadata: [] } func ExampleBow_MarshalJSON() { diff --git a/bow.go b/bow.go index 40ca030..dfba6fe 100644 --- a/bow.go +++ b/bow.go @@ -95,7 +95,7 @@ type bow struct { arrow.Record } -// NewBowEmpty returns a new empty Bow +// NewBowEmpty returns a new empty Bow. func NewBowEmpty() Bow { var fields []arrow.Field var arrays []arrow.Array @@ -103,7 +103,7 @@ func NewBowEmpty() Bow { return &bow{Record: array.NewRecord(schema, arrays, 0)} } -// NewBow returns a new Bow from one or more Series +// NewBow returns a new Bow from one or more Series. func NewBow(series ...Series) (Bow, error) { rec, err := newRecord(Metadata{}, series...) if err != nil { @@ -173,7 +173,7 @@ func NewBowFromRowBasedInterfaces(colNames []string, colTypes []Type, rowBasedDa return NewBow(series...) } -// NewEmptySlice returns an empty slice of the Bow +// NewEmptySlice returns an empty slice of the Bow. func (b *bow) NewEmptySlice() Bow { return b.NewSlice(0, 0) } diff --git a/bowappend.gen.go b/bowappend.gen.go index ef3188b..252e418 100644 --- a/bowappend.gen.go +++ b/bowappend.gen.go @@ -40,8 +40,7 @@ func AppendBows(bows ...Bow) (Bow, error) { builder.Resize(numRows) for _, b := range bows { if colType := b.ColumnType(colIndex); colType != refType { - return nil, fmt.Errorf( - "bow.AppendBows: incompatible types %v and %v", refType, colType) + return nil, fmt.Errorf("incompatible types '%s' and '%s'", refType, colType) } data := b.(*bow).Column(colIndex).Data() arr := array.NewInt64Data(data) @@ -55,8 +54,7 @@ func AppendBows(bows ...Bow) (Bow, error) { builder.Resize(numRows) for _, b := range bows { if colType := b.ColumnType(colIndex); colType != refType { - return nil, fmt.Errorf( - "bow.AppendBows: incompatible types %v and %v", refType, colType) + return nil, fmt.Errorf("incompatible types '%s' and '%s'", refType, colType) } data := b.(*bow).Column(colIndex).Data() arr := array.NewFloat64Data(data) @@ -70,8 +68,7 @@ func AppendBows(bows ...Bow) (Bow, error) { builder.Resize(numRows) for _, b := range bows { if colType := b.ColumnType(colIndex); colType != refType { - return nil, fmt.Errorf( - "bow.AppendBows: incompatible types %v and %v", refType, colType) + return nil, fmt.Errorf("incompatible types '%s' and '%s'", refType, colType) } data := b.(*bow).Column(colIndex).Data() arr := array.NewBooleanData(data) @@ -85,8 +82,7 @@ func AppendBows(bows ...Bow) (Bow, error) { builder.Resize(numRows) for _, b := range bows { if colType := b.ColumnType(colIndex); colType != refType { - return nil, fmt.Errorf( - "bow.AppendBows: incompatible types %v and %v", refType, colType) + return nil, fmt.Errorf("incompatible types '%s' and '%s'", refType, colType) } data := b.(*bow).Column(colIndex).Data() arr := array.NewStringData(data) @@ -96,7 +92,7 @@ func AppendBows(bows ...Bow) (Bow, error) { } newArray = builder.NewArray() default: - return nil, fmt.Errorf("unsupported type %v", refType) + return nil, fmt.Errorf("unsupported type '%s'", refType) } series[colIndex] = Series{ diff --git a/bowappend.gen.go.tmpl b/bowappend.gen.go.tmpl index 2afbdbf..4f84710 100644 --- a/bowappend.gen.go.tmpl +++ b/bowappend.gen.go.tmpl @@ -39,8 +39,7 @@ func AppendBows(bows ...Bow) (Bow, error) { builder.Resize(numRows) for _, b := range bows { if colType := b.ColumnType(colIndex); colType != refType { - return nil, fmt.Errorf( - "bow.AppendBows: incompatible types %v and %v", refType, colType) + return nil, fmt.Errorf("incompatible types '%s' and '%s'", refType, colType) } data := b.(*bow).Column(colIndex).Data() arr := array.New{{ .Type }}Data(data) @@ -51,7 +50,7 @@ func AppendBows(bows ...Bow) (Bow, error) { newArray = builder.NewArray() {{end -}} default: - return nil, fmt.Errorf("unsupported type %v", refType) + return nil, fmt.Errorf("unsupported type '%s'", refType) } series[colIndex] = Series{ diff --git a/bowbuffer.gen.go b/bowbuffer.gen.go index fdf5b2d..27f766b 100644 --- a/bowbuffer.gen.go +++ b/bowbuffer.gen.go @@ -11,30 +11,20 @@ import ( // NewBuffer returns a new Buffer of size `size` and Type `typ`. func NewBuffer(size int, typ Type) Buffer { + buf := Buffer{nullBitmapBytes: make([]byte, bitutil.CeilByte(size)/8)} switch typ { case Int64: - return Buffer{ - Data: make([]int64, size), - nullBitmapBytes: make([]byte, bitutil.CeilByte(size)/8), - } + buf.Data = make([]int64, size) case Float64: - return Buffer{ - Data: make([]float64, size), - nullBitmapBytes: make([]byte, bitutil.CeilByte(size)/8), - } + buf.Data = make([]float64, size) case Boolean: - return Buffer{ - Data: make([]bool, size), - nullBitmapBytes: make([]byte, bitutil.CeilByte(size)/8), - } + buf.Data = make([]bool, size) case String: - return Buffer{ - Data: make([]string, size), - nullBitmapBytes: make([]byte, bitutil.CeilByte(size)/8), - } + buf.Data = make([]string, size) default: - panic(fmt.Errorf("unsupported type %s", typ)) + panic(fmt.Errorf("unsupported type '%s'", typ)) } + return buf } // NewBufferFromData returns from `data`, which has to be a slice of a supported type. @@ -46,7 +36,7 @@ func NewBufferFromData(data interface{}) Buffer { case []bool: case []string: default: - panic(fmt.Errorf("unhandled type %T", data)) + panic(fmt.Errorf("unsupported type '%T'", data)) } return Buffer{ Data: data, @@ -84,7 +74,7 @@ func (b *Buffer) SetOrDrop(i int, value interface{}) { case []string: v[i], valid = String.Convert(value).(string) default: - panic(fmt.Errorf("unsupported type %T", v)) + panic(fmt.Errorf("unsupported type '%T'", v)) } if valid { @@ -108,7 +98,7 @@ func (b *Buffer) SetOrDropStrict(i int, value interface{}) { case []string: v[i], valid = value.(string) default: - panic(fmt.Errorf("unsupported type %T", v)) + panic(fmt.Errorf("unsupported type '%T'", v)) } if valid { @@ -133,7 +123,7 @@ func (b *Buffer) GetValue(i int) interface{} { case []string: return v[i] default: - panic(fmt.Errorf("unsupported type %T", v)) + panic(fmt.Errorf("unsupported type '%T'", v)) } } @@ -149,7 +139,7 @@ func (b Buffer) Less(i, j int) bool { case []bool: return !v[i] && v[j] default: - panic(fmt.Errorf("unsupported type %T", v)) + panic(fmt.Errorf("unsupported type '%T'", v)) } } @@ -194,7 +184,6 @@ func (b *bow) NewBufferFromCol(colIndex int) Buffer { nullBitmapBytes: nullBitmapBytesCopy, } default: - panic(fmt.Errorf( - "unsupported type %+v", b.ColumnType(colIndex))) + panic(fmt.Errorf("unsupported type '%s'", b.ColumnType(colIndex))) } } diff --git a/bowbuffer.gen.go.tmpl b/bowbuffer.gen.go.tmpl index 4e4a7f1..4aaab7e 100644 --- a/bowbuffer.gen.go.tmpl +++ b/bowbuffer.gen.go.tmpl @@ -9,17 +9,16 @@ import ( // NewBuffer returns a new Buffer of size `size` and Type `typ`. func NewBuffer(size int, typ Type) Buffer { + buf := Buffer{nullBitmapBytes: make([]byte, bitutil.CeilByte(size)/8)} switch typ { {{range .Data.types -}} case {{ .Type }}: - return Buffer{ - Data: make([]{{ .type }}, size), - nullBitmapBytes: make([]byte, bitutil.CeilByte(size) / 8), - } + buf.Data = make([]{{ .type }}, size) {{end -}} default: - panic(fmt.Errorf("unsupported type %s", typ)) + panic(fmt.Errorf("unsupported type '%s'", typ)) } + return buf } // NewBufferFromData returns from `data`, which has to be a slice of a supported type. @@ -30,7 +29,7 @@ func NewBufferFromData(data interface{}) Buffer { case []{{ .type }}: {{end -}} default: - panic(fmt.Errorf("unhandled type %T", data)) + panic(fmt.Errorf("unsupported type '%T'", data)) } return Buffer{ Data: data, @@ -60,7 +59,7 @@ func (b *Buffer) SetOrDrop(i int, value interface{}) { v[i], valid = {{ .Type }}.Convert(value).({{ .type }}) {{end -}} default: - panic(fmt.Errorf("unsupported type %T", v)) + panic(fmt.Errorf("unsupported type '%T'", v)) } if valid { @@ -80,7 +79,7 @@ func (b *Buffer) SetOrDropStrict(i int, value interface{}) { v[i], valid = value.({{ .type }}) {{end -}} default: - panic(fmt.Errorf("unsupported type %T", v)) + panic(fmt.Errorf("unsupported type '%T'", v)) } if valid { @@ -101,7 +100,7 @@ func (b *Buffer) GetValue(i int) interface{} { return v[i] {{end -}} default: - panic(fmt.Errorf("unsupported type %T", v)) + panic(fmt.Errorf("unsupported type '%T'", v)) } } @@ -117,7 +116,7 @@ func (b Buffer) Less(i, j int) bool { case []bool: return !v[i] && v[j] default: - panic(fmt.Errorf("unsupported type %T", v)) + panic(fmt.Errorf("unsupported type '%T'", v)) } } @@ -137,7 +136,6 @@ func (b *bow) NewBufferFromCol(colIndex int) Buffer { } {{end -}} default: - panic(fmt.Errorf( - "unsupported type %+v", b.ColumnType(colIndex))) + panic(fmt.Errorf("unsupported type '%s'", b.ColumnType(colIndex))) } } diff --git a/bowdiff.go b/bowdiff.go index 6beed6f..9426d81 100644 --- a/bowdiff.go +++ b/bowdiff.go @@ -22,7 +22,7 @@ func (b *bow) Diff(colIndices ...int) (Bow, error) { case Boolean: default: return nil, fmt.Errorf( - "column '%s' is of unsupported type '%v'", + "column '%s' is of unsupported type '%s'", col.Name, b.ColumnType(colIndex)) } } diff --git a/bowfill.go b/bowfill.go index 52906e8..c6d4236 100644 --- a/bowfill.go +++ b/bowfill.go @@ -10,7 +10,7 @@ import ( // FillLinear fills the column toFillColIndex using the Linear interpolation method according // to the reference column refColIndex, which has to be sorted. -// Fills only int64 and float64 types. +// Fills only Int64 and Float64 types. func (b *bow) FillLinear(refColIndex, toFillColIndex int) (Bow, error) { if refColIndex < 0 || refColIndex > b.NumCols()-1 { return nil, fmt.Errorf("refColIndex is out of range") diff --git a/bowjoin.gen.go b/bowjoin.gen.go index 81f82b9..80d76ae 100644 --- a/bowjoin.gen.go +++ b/bowjoin.gen.go @@ -43,7 +43,7 @@ func innerFillLeftBowCols(newSeries *[]Series, left, right *bow, newNumRows int, } } default: - panic(fmt.Errorf("unsupported type '%v'", left.ColumnType(colIndex))) + panic(fmt.Errorf("unsupported type '%s'", left.ColumnType(colIndex))) } (*newSeries)[colIndex] = NewSeriesFromBuffer(left.ColumnName(colIndex), buf) @@ -91,7 +91,7 @@ func innerFillRightBowCols(newSeries *[]Series, left, right *bow, newNumRows, ne } } default: - panic(fmt.Errorf("unsupported type '%v'", right.ColumnType(rightCol))) + panic(fmt.Errorf("unsupported type '%s'", right.ColumnType(rightCol))) } (*newSeries)[colIndex] = NewSeriesFromBuffer(right.ColumnName(rightCol), buf) @@ -199,7 +199,7 @@ func outerFillLeftBowCols(newSeries *[]Series, left, right *bow, newNumRows, uni } } default: - panic(fmt.Errorf("unsupported type '%v'", left.ColumnType(colIndex))) + panic(fmt.Errorf("unsupported type '%s'", left.ColumnType(colIndex))) } // Fill remaining rows from right bow if column is common @@ -210,8 +210,8 @@ func outerFillLeftBowCols(newSeries *[]Series, left, right *bow, newNumRows, uni } for rightRow := 0; isColCommon && rightRow < right.NumRows(); rightRow++ { var isRowCommon bool - for commonRow := 0; commonRow < len(commonRows.r); commonRow++ { - if rightRow == commonRows.r[commonRow] { + for i := 0; i < len(commonRows.r); i++ { + if rightRow == commonRows.r[i] { isRowCommon = true break } @@ -264,8 +264,8 @@ func outerFillRightBowCols(newSeries *[]Series, left, right *bow, newNumCols, newRow := left.NumRows() + len(commonRows.r) - uniquesLeft for rightRow := 0; rightRow < right.NumRows(); rightRow++ { var isRowCommon bool - for commonRow := 0; commonRow < len(commonRows.r); commonRow++ { - if rightRow == commonRows.r[commonRow] { + for i := 0; i < len(commonRows.r); i++ { + if rightRow == commonRows.r[i] { isRowCommon = true break } @@ -301,8 +301,8 @@ func outerFillRightBowCols(newSeries *[]Series, left, right *bow, newNumCols, newRow := left.NumRows() + len(commonRows.r) - uniquesLeft for rightRow := 0; rightRow < right.NumRows(); rightRow++ { var isRowCommon bool - for commonRow := 0; commonRow < len(commonRows.r); commonRow++ { - if rightRow == commonRows.r[commonRow] { + for i := 0; i < len(commonRows.r); i++ { + if rightRow == commonRows.r[i] { isRowCommon = true break } @@ -338,8 +338,8 @@ func outerFillRightBowCols(newSeries *[]Series, left, right *bow, newNumCols, newRow := left.NumRows() + len(commonRows.r) - uniquesLeft for rightRow := 0; rightRow < right.NumRows(); rightRow++ { var isRowCommon bool - for commonRow := 0; commonRow < len(commonRows.r); commonRow++ { - if rightRow == commonRows.r[commonRow] { + for i := 0; i < len(commonRows.r); i++ { + if rightRow == commonRows.r[i] { isRowCommon = true break } @@ -375,8 +375,8 @@ func outerFillRightBowCols(newSeries *[]Series, left, right *bow, newNumCols, newRow := left.NumRows() + len(commonRows.r) - uniquesLeft for rightRow := 0; rightRow < right.NumRows(); rightRow++ { var isRowCommon bool - for commonRow := 0; commonRow < len(commonRows.r); commonRow++ { - if rightRow == commonRows.r[commonRow] { + for i := 0; i < len(commonRows.r); i++ { + if rightRow == commonRows.r[i] { isRowCommon = true break } @@ -389,7 +389,7 @@ func outerFillRightBowCols(newSeries *[]Series, left, right *bow, newNumCols, } } default: - panic(fmt.Errorf("unsupported type '%v'", right.ColumnType(rightCol))) + panic(fmt.Errorf("unsupported type '%s'", right.ColumnType(rightCol))) } (*newSeries)[colIndex] = NewSeriesFromBuffer(right.ColumnName(rightCol), buf) rightCol++ diff --git a/bowjoin.gen.go.tmpl b/bowjoin.gen.go.tmpl index fe60161..8cefacd 100644 --- a/bowjoin.gen.go.tmpl +++ b/bowjoin.gen.go.tmpl @@ -22,7 +22,7 @@ func innerFillLeftBowCols(newSeries *[]Series, left, right *bow, newNumRows int, } {{end -}} default: - panic(fmt.Errorf("unsupported type '%v'", left.ColumnType(colIndex))) + panic(fmt.Errorf("unsupported type '%s'", left.ColumnType(colIndex))) } (*newSeries)[colIndex] = NewSeriesFromBuffer(left.ColumnName(colIndex), buf) @@ -51,7 +51,7 @@ func innerFillRightBowCols(newSeries *[]Series, left, right *bow, newNumRows, ne } {{end -}} default: - panic(fmt.Errorf("unsupported type '%v'", right.ColumnType(rightCol))) + panic(fmt.Errorf("unsupported type '%s'", right.ColumnType(rightCol))) } (*newSeries)[colIndex] = NewSeriesFromBuffer(right.ColumnName(rightCol), buf) @@ -95,7 +95,7 @@ func outerFillLeftBowCols(newSeries *[]Series, left, right *bow, newNumRows, uni } {{end -}} default: - panic(fmt.Errorf("unsupported type '%v'", left.ColumnType(colIndex))) + panic(fmt.Errorf("unsupported type '%s'", left.ColumnType(colIndex))) } // Fill remaining rows from right bow if column is common @@ -106,8 +106,8 @@ func outerFillLeftBowCols(newSeries *[]Series, left, right *bow, newNumRows, uni } for rightRow := 0; isColCommon && rightRow < right.NumRows(); rightRow++ { var isRowCommon bool - for commonRow := 0; commonRow < len(commonRows.r); commonRow++ { - if rightRow == commonRows.r[commonRow] { + for i := 0; i < len(commonRows.r); i++ { + if rightRow == commonRows.r[i] { isRowCommon = true break } @@ -161,8 +161,8 @@ func outerFillRightBowCols(newSeries *[]Series, left, right *bow, newNumCols, newRow := left.NumRows() + len(commonRows.r) - uniquesLeft for rightRow := 0; rightRow < right.NumRows(); rightRow++ { var isRowCommon bool - for commonRow := 0; commonRow < len(commonRows.r); commonRow++ { - if rightRow == commonRows.r[commonRow] { + for i := 0; i < len(commonRows.r); i++ { + if rightRow == commonRows.r[i] { isRowCommon = true break } @@ -176,7 +176,7 @@ func outerFillRightBowCols(newSeries *[]Series, left, right *bow, newNumCols, } {{end -}} default: - panic(fmt.Errorf("unsupported type '%v'", right.ColumnType(rightCol))) + panic(fmt.Errorf("unsupported type '%s'", right.ColumnType(rightCol))) } (*newSeries)[colIndex] = NewSeriesFromBuffer(right.ColumnName(rightCol), buf) rightCol++ diff --git a/bowrecord.go b/bowrecord.go index 6b92104..44e0a1b 100644 --- a/bowrecord.go +++ b/bowrecord.go @@ -25,7 +25,7 @@ func newRecord(metadata Metadata, series ...Series) (arrow.Record, error) { return nil, errors.New("empty Series name") } if getBowTypeFromArrowType(s.Array.DataType()) == Unknown { - return nil, fmt.Errorf("unsupported type: %s", s.Array.DataType().Name()) + return nil, fmt.Errorf("unsupported type '%s'", s.Array.DataType()) } if int64(s.Array.Len()) != nRows { return nil, diff --git a/bowseries.gen.go b/bowseries.gen.go index b724393..dab0b95 100644 --- a/bowseries.gen.go +++ b/bowseries.gen.go @@ -27,7 +27,7 @@ func NewSeries(name string, dataArray interface{}, validityArray interface{}) Se case []string: return newStringSeries(name, v, buildNullBitmapBytes(len(v), validityArray)) default: - panic(fmt.Errorf("unsupported type %T", v)) + panic(fmt.Errorf("unsupported type '%T'", v)) } } @@ -157,6 +157,6 @@ func NewSeriesFromInterfaces(name string, typ Type, data []interface{}) Series { } return Series{Name: name, Array: builder.NewArray()} default: - panic(fmt.Errorf("unhandled type %s", typ)) + panic(fmt.Errorf("unsupported type '%s'", typ)) } } diff --git a/bowseries.gen.go.tmpl b/bowseries.gen.go.tmpl index 19a2a2e..c3527c0 100644 --- a/bowseries.gen.go.tmpl +++ b/bowseries.gen.go.tmpl @@ -21,7 +21,7 @@ func NewSeries(name string, dataArray interface{}, validityArray interface{}) Se return new{{ .Type }}Series(name, v, buildNullBitmapBytes(len(v), validityArray)) {{end -}} default: - panic(fmt.Errorf("unsupported type %T", v)) + panic(fmt.Errorf("unsupported type '%T'", v)) } } @@ -91,6 +91,6 @@ func NewSeriesFromInterfaces(name string, typ Type, data []interface{}) Series { return Series{Name: name, Array: builder.NewArray()} {{end -}} default: - panic(fmt.Errorf("unhandled type %s", typ)) + panic(fmt.Errorf("unsupported type '%s'", typ)) } } \ No newline at end of file diff --git a/bowseries.go b/bowseries.go index c1e098c..099394c 100644 --- a/bowseries.go +++ b/bowseries.go @@ -36,7 +36,7 @@ func buildNullBitmapBool(dataLength int, validityArray interface{}) []bool { } return res default: - panic(fmt.Errorf("unsupported type %T", valid)) + panic(fmt.Errorf("unsupported type '%T'", valid)) } } diff --git a/bowstring.go b/bowstring.go index 3ce4bba..70ad336 100644 --- a/bowstring.go +++ b/bowstring.go @@ -42,13 +42,14 @@ func (b *bow) String() string { }) } - _, err := fmt.Fprintf(w, "metadata: %+v\n", b.Metadata()) - if err != nil { - panic(err) + if b.Metadata().Len() > 0 { + _, err := fmt.Fprintf(w, "metadata: %+v\n", b.Metadata()) + if err != nil { + panic(err) + } } - // Flush buffer and format lines along the way - if err = w.Flush(); err != nil { + if err := w.Flush(); err != nil { panic(err) } From 6901596a7a0c30176577671376c4a36c369c7e08 Mon Sep 17 00:00:00 2001 From: agelloz Date: Thu, 28 Apr 2022 14:08:25 +0200 Subject: [PATCH 10/29] revert Boolean renaming --- XXXexamples_test.go | 2 +- benchmarks/generator_test.go | 4 ++-- bow_test.go | 6 +++--- bowappend.go | 2 +- bowbuffer.go | 16 ++++++++-------- bowconvert.go | 4 ++-- bowconvert_test.go | 20 ++++++++++---------- bowdiff.go | 4 ++-- bowdiff_test.go | 8 ++++---- bowfill.go | 2 +- bowfill_test.go | 12 ++++++------ bowfind_test.go | 2 +- bowgenerator.go | 2 +- bowgenerator_test.go | 4 ++-- bowgetters.go | 2 +- bowjoin.go | 8 ++++---- bowjson_test.go | 6 +++--- bowparquet_test.go | 4 ++-- bowseries.go | 10 +++++----- bowsort_test.go | 8 ++++---- bowtypes.go | 8 ++++---- rolling/aggregation.go | 2 +- rolling/aggregation/core_test.go | 2 +- rolling/aggregation/firstlast_test.go | 4 ++-- rolling/aggregation/mode_test.go | 2 +- rolling/aggregation/whole_test.go | 2 +- rolling/interpolation/linear_test.go | 2 +- rolling/interpolation/none.go | 2 +- rolling/interpolation/stepprevious.go | 2 +- rolling/interpolation/stepprevious_test.go | 4 ++-- rolling/interpolation_test.go | 2 +- 31 files changed, 79 insertions(+), 79 deletions(-) diff --git a/XXXexamples_test.go b/XXXexamples_test.go index 02d162c..9ed640a 100644 --- a/XXXexamples_test.go +++ b/XXXexamples_test.go @@ -10,7 +10,7 @@ func ExampleNewBow() { b, err := NewBow( NewSeries("col1", Int64, []int64{1, 2, 3, 4}, nil), NewSeries("col2", Float64, []float64{1.1, 2.2, 3.3, 4}, []bool{true, false, true, true}), - NewSeries("col3", Bool, []bool{true, false, true, false}, []bool{true, false, true, true}), + NewSeries("col3", Boolean, []bool{true, false, true, false}, []bool{true, false, true, true}), ) if err != nil { panic(err) diff --git a/benchmarks/generator_test.go b/benchmarks/generator_test.go index aa199be..a557bcc 100644 --- a/benchmarks/generator_test.go +++ b/benchmarks/generator_test.go @@ -37,7 +37,7 @@ func TestGeneratorForBenchmarks(t *testing.T) { Name: "Boolean_bow1", GenStrategy: bow.GenStrategyRandom, MissingData: true, - Type: bow.Bool, + Type: bow.Boolean, }, bow.GenSeriesOptions{ Name: "String_bow1", @@ -68,7 +68,7 @@ func TestGeneratorForBenchmarks(t *testing.T) { Name: "Boolean_bow2", GenStrategy: bow.GenStrategyRandom, MissingData: true, - Type: bow.Bool, + Type: bow.Boolean, }, bow.GenSeriesOptions{ Name: "String_bow2", diff --git a/bow_test.go b/bow_test.go index 2726447..26b4737 100644 --- a/bow_test.go +++ b/bow_test.go @@ -36,7 +36,7 @@ func TestNewBowFromColumnBasedInterface(t *testing.T) { expected, err := NewBowFromColBasedInterfaces( []string{"int", "float", "string", "bool"}, - []Type{Int64, Float64, String, Bool}, + []Type{Int64, Float64, String, Boolean}, [][]interface{}{ {10, 2}, {10., 2.}, @@ -297,7 +297,7 @@ func TestBow_AddCols(t *testing.T) { require.NoError(t, err) serieC := NewSeries("c", Int64, []int64{1, 2, 3, 4}, nil) serieD := NewSeries("d", String, []string{"one", "two", "three", "four"}, nil) - serieE := NewSeries("e", Bool, []bool{true, false, true, false}, nil) + serieE := NewSeries("e", Boolean, []bool{true, false, true, false}, nil) t.Run("empty", func(t *testing.T) { b := NewBowEmpty() @@ -320,7 +320,7 @@ func TestBow_AddCols(t *testing.T) { t.Run("valid series", func(t *testing.T) { expected, err := NewBowFromRowBasedInterfaces( []string{"time", "a", "b", "c", "d", "e"}, - []Type{Int64, Float64, Float64, Int64, String, Bool}, + []Type{Int64, Float64, Float64, Int64, String, Boolean}, [][]interface{}{ {1, 1.1, 2.1, 1, "one", true}, {2, 1.2, 2.2, 2, "two", false}, diff --git a/bowappend.go b/bowappend.go index 591877b..a287140 100644 --- a/bowappend.go +++ b/bowappend.go @@ -63,7 +63,7 @@ func AppendBows(bows ...Bow) (Bow, error) { builder.AppendValues(v, valid) } newArray = builder.NewArray() - case Bool: + case Boolean: builder := array.NewBooleanBuilder(mem) builder.Resize(numRows) for _, b := range bows { diff --git a/bowbuffer.go b/bowbuffer.go index 33f6b8b..cca4899 100644 --- a/bowbuffer.go +++ b/bowbuffer.go @@ -29,7 +29,7 @@ func NewBuffer(size int, typ Type) Buffer { res.Data = make([]int64, size) case Float64: res.Data = make([]float64, size) - case Bool: + case Boolean: res.Data = make([]bool, size) case String: res.Data = make([]string, size) @@ -47,7 +47,7 @@ func (b Buffer) Len() int { return len(b.Data.([]int64)) case Float64: return len(b.Data.([]float64)) - case Bool: + case Boolean: return len(b.Data.([]bool)) case String: return len(b.Data.([]string)) @@ -65,8 +65,8 @@ func (b *Buffer) SetOrDrop(i int, value interface{}) { b.Data.([]int64)[i], valid = Int64.Convert(value).(int64) case Float64: b.Data.([]float64)[i], valid = Float64.Convert(value).(float64) - case Bool: - b.Data.([]bool)[i], valid = Bool.Convert(value).(bool) + case Boolean: + b.Data.([]bool)[i], valid = Boolean.Convert(value).(bool) case String: b.Data.([]string)[i], valid = String.Convert(value).(string) case TimestampSec, TimestampMilli, TimestampMicro, TimestampNano: @@ -89,7 +89,7 @@ func (b *Buffer) SetOrDropStrict(i int, value interface{}) { b.Data.([]int64)[i], valid = value.(int64) case Float64: b.Data.([]float64)[i], valid = value.(float64) - case Bool: + case Boolean: b.Data.([]bool)[i], valid = value.(bool) case String: b.Data.([]string)[i], valid = value.(string) @@ -116,7 +116,7 @@ func (b *Buffer) GetValue(i int) interface{} { return b.Data.([]int64)[i] case Float64: return b.Data.([]float64)[i] - case Bool: + case Boolean: return b.Data.([]bool)[i] case String: return b.Data.([]string)[i] @@ -135,7 +135,7 @@ func (b Buffer) Less(i, j int) bool { return b.Data.([]float64)[i] < b.Data.([]float64)[j] case String: return b.Data.([]string)[i] < b.Data.([]string)[j] - case Bool: + case Boolean: return !b.Data.([]bool)[i] && b.Data.([]bool)[j] case TimestampSec, TimestampMilli, TimestampMicro, TimestampNano: return b.Data.([]arrow.Timestamp)[i] < b.Data.([]arrow.Timestamp)[j] @@ -162,7 +162,7 @@ func (b *bow) NewBufferFromCol(colIndex int) Buffer { copy(nullBitmapBytesCopy, nullBitmapBytes) res.Data = float64Values(arr) res.nullBitmapBytes = nullBitmapBytesCopy - case Bool: + case Boolean: arr := array.NewBooleanData(arrayData) nullBitmapBytes := arr.NullBitmapBytes()[:bitutil.CeilByte(arr.Data().Len())/8] nullBitmapBytesCopy := make([]byte, len(nullBitmapBytes)) diff --git a/bowconvert.go b/bowconvert.go index dbe8ab1..2726ed7 100644 --- a/bowconvert.go +++ b/bowconvert.go @@ -78,10 +78,10 @@ func ToFloat64(input interface{}) (output float64, ok bool) { return } -// ToBool attempts to convert `input` to bool. +// ToBoolean attempts to convert `input` to bool. // Return also a false boolean if the conversion failed. // In case of numeric type, returns true if the value is non-zero. -func ToBool(input interface{}) (output bool, ok bool) { +func ToBoolean(input interface{}) (output bool, ok bool) { switch input := input.(type) { case bool: return input, true diff --git a/bowconvert_test.go b/bowconvert_test.go index 2e53585..a2ea94a 100644 --- a/bowconvert_test.go +++ b/bowconvert_test.go @@ -13,41 +13,41 @@ func TestToBool(t *testing.T) { var v bool var ok bool - v, ok = ToBool(true) + v, ok = ToBoolean(true) require.True(t, ok) assert.Equal(t, true, v) - v, ok = ToBool(false) + v, ok = ToBoolean(false) require.True(t, ok) assert.Equal(t, false, v) - v, ok = ToBool("true") + v, ok = ToBoolean("true") require.True(t, ok) assert.Equal(t, true, v) - v, ok = ToBool("True") + v, ok = ToBoolean("True") require.True(t, ok) assert.Equal(t, true, v) - v, ok = ToBool("false") + v, ok = ToBoolean("false") require.True(t, ok) assert.Equal(t, false, v) - v, ok = ToBool("False") + v, ok = ToBoolean("False") require.True(t, ok) assert.Equal(t, false, v) - v, ok = ToBool(1) + v, ok = ToBoolean(1) require.True(t, v) require.True(t, ok) - v, ok = ToBool(0) + v, ok = ToBoolean(0) require.False(t, v) require.True(t, ok) - v, ok = ToBool(1.) + v, ok = ToBoolean(1.) require.True(t, v) require.True(t, ok) - v, ok = ToBool(0.) + v, ok = ToBoolean(0.) require.False(t, v) require.True(t, ok) } diff --git a/bowdiff.go b/bowdiff.go index 3951e63..6beed6f 100644 --- a/bowdiff.go +++ b/bowdiff.go @@ -19,7 +19,7 @@ func (b *bow) Diff(colIndices ...int) (Bow, error) { switch b.ColumnType(colIndex) { case Int64: case Float64: - case Bool: + case Boolean: default: return nil, fmt.Errorf( "column '%s' is of unsupported type '%v'", @@ -56,7 +56,7 @@ func (b *bow) Diff(colIndices ...int) (Bow, error) { currVal := colBuf.GetValue(rowIndex).(float64) prevVal := colBuf.GetValue(rowIndex - 1).(float64) calcBuf.SetOrDrop(rowIndex, currVal-prevVal) - case Bool: + case Boolean: currVal := colBuf.GetValue(rowIndex).(bool) prevVal := colBuf.GetValue(rowIndex - 1).(bool) calcBuf.SetOrDrop(rowIndex, currVal != prevVal) diff --git a/bowdiff_test.go b/bowdiff_test.go index a2acac9..38a5ca0 100644 --- a/bowdiff_test.go +++ b/bowdiff_test.go @@ -16,7 +16,7 @@ func TestDiff(t *testing.T) { NewSeries("b", Float64, []float64{1., 2., 3., 4., 0., 5.}, []bool{true, true, true, true, false, true}), - NewSeries("c", Bool, + NewSeries("c", Boolean, []bool{false, false, true, true, false, false}, []bool{true, true, true, true, false, true}), ) @@ -29,7 +29,7 @@ func TestDiff(t *testing.T) { NewSeries("b", Float64, []float64{0., 1., 1., 1., 0., 0.}, []bool{false, true, true, true, false, false}), - NewSeries("c", Bool, + NewSeries("c", Boolean, []bool{false, false, true, false, false, false}, []bool{false, true, true, true, false, false}), ) @@ -43,7 +43,7 @@ func TestDiff(t *testing.T) { t.Run("one column all supported types", func(t *testing.T) { b, err := NewBowFromRowBasedInterfaces( []string{"a", "b", "c"}, - []Type{Int64, Float64, Bool}, + []Type{Int64, Float64, Boolean}, [][]interface{}{ {1, 1., false}, {2, 2., false}, @@ -53,7 +53,7 @@ func TestDiff(t *testing.T) { expected, err := NewBowFromRowBasedInterfaces( []string{"a", "b", "c"}, - []Type{Int64, Float64, Bool}, + []Type{Int64, Float64, Boolean}, [][]interface{}{ {1, nil, false}, {2, 1., false}, diff --git a/bowfill.go b/bowfill.go index d168488..a96652a 100644 --- a/bowfill.go +++ b/bowfill.go @@ -219,7 +219,7 @@ func fill(method string, b *bow, colIndices ...int) (Bow, error) { buf.SetOrDropStrict(rowIndex, arr.Value(fillRowIndex)) } } - case Bool: + case Boolean: arr := array.NewBooleanData(data) for rowIndex := 0; rowIndex < b.NumRows(); rowIndex++ { if buf.IsValid(rowIndex) { diff --git a/bowfill_test.go b/bowfill_test.go index 9d316d2..cdd2f29 100644 --- a/bowfill_test.go +++ b/bowfill_test.go @@ -400,7 +400,7 @@ func TestFill(t *testing.T) { t.Run("Previous", func(t *testing.T) { b, err := NewBowFromRowBasedInterfaces( []string{"a", "b", "c"}, - []Type{Int64, Bool, String}, + []Type{Int64, Boolean, String}, [][]interface{}{ {20, nil, "dgr"}, {13, false, "sfr"}, @@ -414,7 +414,7 @@ func TestFill(t *testing.T) { expected, err := NewBowFromRowBasedInterfaces( []string{"a", "b", "c"}, - []Type{Int64, Bool, String}, + []Type{Int64, Boolean, String}, [][]interface{}{ {20, nil, "dgr"}, {13, false, "sfr"}, @@ -434,7 +434,7 @@ func TestFill(t *testing.T) { t.Run("Next", func(t *testing.T) { b, err := NewBowFromRowBasedInterfaces( []string{"a", "b", "c"}, - []Type{Int64, Bool, String}, + []Type{Int64, Boolean, String}, [][]interface{}{ {20, nil, "dgr"}, {13, false, "sfr"}, @@ -448,7 +448,7 @@ func TestFill(t *testing.T) { expected, err := NewBowFromRowBasedInterfaces( []string{"a", "b", "c"}, - []Type{Int64, Bool, String}, + []Type{Int64, Boolean, String}, [][]interface{}{ {20, false, "dgr"}, {13, false, "sfr"}, @@ -468,7 +468,7 @@ func TestFill(t *testing.T) { t.Run("Mean", func(t *testing.T) { b, err := NewBowFromRowBasedInterfaces( []string{"a", "b", "c"}, - []Type{Int64, Bool, String}, + []Type{Int64, Boolean, String}, [][]interface{}{ {20, nil, "dgr"}, {13, false, "sfr"}, @@ -486,7 +486,7 @@ func TestFill(t *testing.T) { t.Run("Linear", func(t *testing.T) { b, err := NewBowFromRowBasedInterfaces( []string{"a", "b", "c"}, - []Type{Int64, Bool, String}, + []Type{Int64, Boolean, String}, [][]interface{}{ {20, nil, "dgr"}, {13, false, "sfr"}, diff --git a/bowfind_test.go b/bowfind_test.go index 49cecc9..27c4976 100644 --- a/bowfind_test.go +++ b/bowfind_test.go @@ -16,7 +16,7 @@ var sortedTestBow, _ = NewBow( NewSeries(String.String(), String, []string{"0", "1", "0", "0"}, []bool{true, true, false, true}), - NewSeries(Bool.String(), Bool, + NewSeries(Boolean.String(), Boolean, []bool{false, true, false, false}, []bool{true, true, false, true}), ) diff --git a/bowgenerator.go b/bowgenerator.go index f1df2bd..7839829 100644 --- a/bowgenerator.go +++ b/bowgenerator.go @@ -126,7 +126,7 @@ func newRandomNumber(typ Type) interface{} { return n.Int64() case Float64: return float64(n.Int64()) + 0.5 - case Bool: + case Boolean: return n.Int64() > 5 case String: return uuid.New().String()[:8] diff --git a/bowgenerator_test.go b/bowgenerator_test.go index 6afabf5..d3f82ac 100644 --- a/bowgenerator_test.go +++ b/bowgenerator_test.go @@ -59,7 +59,7 @@ func TestGenerator(t *testing.T) { GenSeriesOptions{Name: "A", Type: Int64}, GenSeriesOptions{Name: "B", Type: Float64}, GenSeriesOptions{Name: "C", Type: String}, - GenSeriesOptions{Name: "D", Type: Bool}, + GenSeriesOptions{Name: "D", Type: Boolean}, ) assert.NoError(t, err) @@ -71,6 +71,6 @@ func TestGenerator(t *testing.T) { assert.Equal(t, Int64, b.ColumnType(0)) assert.Equal(t, Float64, b.ColumnType(1)) assert.Equal(t, String, b.ColumnType(2)) - assert.Equal(t, Bool, b.ColumnType(3)) + assert.Equal(t, Boolean, b.ColumnType(3)) }) } diff --git a/bowgetters.go b/bowgetters.go index b5dabf1..70844aa 100644 --- a/bowgetters.go +++ b/bowgetters.go @@ -53,7 +53,7 @@ func (b *bow) GetValue(colIndex, rowIndex int) interface{} { return array.NewFloat64Data(b.Column(colIndex).Data()).Value(rowIndex) case Int64: return array.NewInt64Data(b.Column(colIndex).Data()).Value(rowIndex) - case Bool: + case Boolean: return array.NewBooleanData(b.Column(colIndex).Data()).Value(rowIndex) case String: return array.NewStringData(b.Column(colIndex).Data()).Value(rowIndex) diff --git a/bowjoin.go b/bowjoin.go index 5a1ff87..1c49f99 100644 --- a/bowjoin.go +++ b/bowjoin.go @@ -205,7 +205,7 @@ func innerFillLeftBowCols(newSeries *[]Series, left *bow, newNumRows int, buf.SetOrDropStrict(rowIndex, data.Value(commonRows.l[rowIndex])) } } - case Bool: + case Boolean: data := array.NewBooleanData(left.Column(colIndex).Data()) for rowIndex := 0; rowIndex < newNumRows; rowIndex++ { if data.IsValid(commonRows.l[rowIndex]) { @@ -260,7 +260,7 @@ func innerFillRightBowCols(newSeries *[]Series, left, right *bow, newNumRows, ne buf.SetOrDropStrict(rowIndex, data.Value(commonRows.r[rowIndex])) } } - case Bool: + case Boolean: data := array.NewBooleanData(right.Column(rightCol).Data()) for rowIndex := 0; rowIndex < newNumRows; rowIndex++ { if data.IsValid(commonRows.r[rowIndex]) { @@ -345,7 +345,7 @@ func outerFillLeftBowCols(newSeries *[]Series, left, right *bow, newNumRows, uni break } } - case Bool: + case Boolean: data := array.NewBooleanData(left.Column(colIndex).Data()) for newRow := 0; left.NumRows() > 0 && newRow < newNumRows; newRow++ { if data.IsValid(leftRow) { @@ -527,7 +527,7 @@ func outerFillRightBowCols(newSeries *[]Series, left, right *bow, newNumCols, newRow++ } } - case Bool: + case Boolean: data := array.NewBooleanData(right.Column(rightCol).Data()) // Fill common rows from right bow diff --git a/bowjson_test.go b/bowjson_test.go index 349f770..de1b46f 100644 --- a/bowjson_test.go +++ b/bowjson_test.go @@ -31,7 +31,7 @@ func TestJSON(t *testing.T) { t.Run("simple", func(t *testing.T) { b, err := NewBowFromRowBasedInterfaces( []string{"a", "b", "c"}, - []Type{Int64, Float64, Bool}, + []Type{Int64, Float64, Boolean}, [][]interface{}{ {100, 200., false}, {110, 220., true}, @@ -82,7 +82,7 @@ func TestJSON(t *testing.T) { t.Run("simple", func(t *testing.T) { b, err := NewBowFromRowBasedInterfaces( []string{"a", "b", "c"}, - []Type{Int64, Float64, Bool}, + []Type{Int64, Float64, Boolean}, [][]interface{}{ {100, 200., false}, {110, 220., true}, @@ -104,7 +104,7 @@ func TestJSON(t *testing.T) { t.Run("simple no data", func(t *testing.T) { b, err := NewBowFromRowBasedInterfaces( []string{"a", "b", "c"}, - []Type{Int64, Float64, Bool}, + []Type{Int64, Float64, Boolean}, [][]interface{}{}) require.NoError(t, err) diff --git a/bowparquet_test.go b/bowparquet_test.go index 6900f72..d732287 100644 --- a/bowparquet_test.go +++ b/bowparquet_test.go @@ -35,7 +35,7 @@ func TestParquet(t *testing.T) { "timestamp_ms_int", "timestamp_ms_str", "timestamp_us_int", "timestamp_us_str", "timestamp_ns_int", "timestamp_ns_str"}, - []Type{Int64, Float64, Bool, String, + []Type{Int64, Float64, Boolean, String, TimestampMilli, TimestampMilli, TimestampMicro, TimestampMicro, TimestampNano, TimestampNano}, @@ -73,7 +73,7 @@ func TestParquet(t *testing.T) { t.Run("bow supported types without rows", func(t *testing.T) { bBefore, err := NewBowFromRowBasedInterfaces( []string{"int", "float", "bool", "string"}, - []Type{Int64, Float64, Bool, String}, + []Type{Int64, Float64, Boolean, String}, [][]interface{}{}) require.NoError(t, err) diff --git a/bowseries.go b/bowseries.go index 88a49d5..0b1e740 100644 --- a/bowseries.go +++ b/bowseries.go @@ -25,7 +25,7 @@ func NewSeries(name string, typ Type, dataArray interface{}, validityArray inter case Float64: return newFloat64Series(name, dataArray.([]float64), buildNullBitmapBytes(len(dataArray.([]float64)), validityArray)) - case Bool: + case Boolean: return newBooleanSeries(name, dataArray.([]bool), buildNullBitmapBytes(len(dataArray.([]bool)), validityArray)) case String: @@ -45,7 +45,7 @@ func NewSeriesFromBuffer(name string, buf Buffer) Series { return newInt64Series(name, buf.Data.([]int64), buf.nullBitmapBytes) case Float64: return newFloat64Series(name, buf.Data.([]float64), buf.nullBitmapBytes) - case Bool: + case Boolean: return newBooleanSeries(name, buf.Data.([]bool), buf.nullBitmapBytes) case String: return newStringSeries(name, buf.Data.([]string), buf.nullBitmapBytes) @@ -92,12 +92,12 @@ func NewSeriesFromInterfaces(name string, typ Type, cells []interface{}) Series builder.Append(v) } return Series{Name: name, Array: builder.NewArray()} - case Bool: + case Boolean: builder := array.NewBooleanBuilder(mem) defer builder.Release() builder.Resize(len(cells)) for i := 0; i < len(cells); i++ { - v, ok := ToBool(cells[i]) + v, ok := ToBoolean(cells[i]) if !ok { builder.AppendNull() continue @@ -224,7 +224,7 @@ func getBowTypeFromInterfaces(colBasedData []interface{}) (Type, error) { case string: return String, nil case bool: - return Bool, nil + return Boolean, nil } } } diff --git a/bowsort_test.go b/bowsort_test.go index 4d56f34..94aeea6 100644 --- a/bowsort_test.go +++ b/bowsort_test.go @@ -29,7 +29,7 @@ func TestBow_SortByCol(t *testing.T) { t.Run("unsorted with all types", func(t *testing.T) { b, err := NewBowFromRowBasedInterfaces( []string{"time", "i", "f", "b", "s"}, - []Type{Int64, Int64, Float64, Bool, String}, + []Type{Int64, Int64, Float64, Boolean, String}, [][]interface{}{ {10, 2, 3.1, true, "ho"}, {11, 2, 5.9, false, "la"}, @@ -39,7 +39,7 @@ func TestBow_SortByCol(t *testing.T) { require.NoError(t, err) expected, err := NewBowFromRowBasedInterfaces( []string{"time", "i", "f", "b", "s"}, - []Type{Int64, Int64, Float64, Bool, String}, + []Type{Int64, Int64, Float64, Boolean, String}, [][]interface{}{ {10, 2, 3.1, true, "ho"}, {11, 2, 5.9, false, "la"}, @@ -105,7 +105,7 @@ func TestBow_SortByCol(t *testing.T) { t.Run("unsorted with nil values and all types", func(t *testing.T) { b, err := NewBowFromRowBasedInterfaces( []string{"time", "int", "float", "string", "bool"}, - []Type{Int64, Int64, Float64, String, Bool}, + []Type{Int64, Int64, Float64, String, Boolean}, [][]interface{}{ {10, 5, nil, "bonjour", true}, {11, 2, 56., "comment", false}, @@ -115,7 +115,7 @@ func TestBow_SortByCol(t *testing.T) { require.NoError(t, err) expected, err := NewBowFromRowBasedInterfaces( []string{"time", "int", "float", "string", "bool"}, - []Type{Int64, Int64, Float64, String, Bool}, + []Type{Int64, Int64, Float64, String, Boolean}, [][]interface{}{ {10, 5, nil, "bonjour", true}, {11, 2, 56., "comment", false}, diff --git a/bowtypes.go b/bowtypes.go index 88cacb7..cef8a08 100644 --- a/bowtypes.go +++ b/bowtypes.go @@ -21,7 +21,7 @@ const ( // Float64 and following types are native arrow type supported by bow. Float64 Int64 - Bool + Boolean String TimestampSec TimestampMilli @@ -39,7 +39,7 @@ var ( mapBowToArrowDataTypes = map[Type]arrow.DataType{ Float64: arrow.PrimitiveTypes.Float64, Int64: arrow.PrimitiveTypes.Int64, - Bool: arrow.FixedWidthTypes.Boolean, + Boolean: arrow.FixedWidthTypes.Boolean, String: arrow.BinaryTypes.String, TimestampSec: arrow.FixedWidthTypes.Timestamp_s, TimestampMilli: arrow.FixedWidthTypes.Timestamp_ms, @@ -69,8 +69,8 @@ func (t Type) Convert(input interface{}) interface{} { output, ok = ToFloat64(input) case Int64: output, ok = ToInt64(input) - case Bool: - output, ok = ToBool(input) + case Boolean: + output, ok = ToBoolean(input) case String: output, ok = ToString(input) case TimestampSec: diff --git a/rolling/aggregation.go b/rolling/aggregation.go index f0872a2..376f5e4 100644 --- a/rolling/aggregation.go +++ b/rolling/aggregation.go @@ -109,7 +109,7 @@ func (a *colAggregation) SetTransformations(transformations ...transformation.Fu func (a *colAggregation) GetReturnType(inputType, iteratorType bow.Type) bow.Type { switch a.Type() { - case bow.Int64, bow.Float64, bow.Bool, bow.String: + case bow.Int64, bow.Float64, bow.Boolean, bow.String: return a.Type() case bow.InputDependent: return inputType diff --git a/rolling/aggregation/core_test.go b/rolling/aggregation/core_test.go index 15252ac..4ed684a 100644 --- a/rolling/aggregation/core_test.go +++ b/rolling/aggregation/core_test.go @@ -53,7 +53,7 @@ var ( }) sparseBoolBow, _ = bow.NewBowFromRowBasedInterfaces( []string{timeCol, valueCol}, - []bow.Type{bow.Int64, bow.Bool}, + []bow.Type{bow.Int64, bow.Boolean}, [][]interface{}{ {10, true}, // partially valid window {11, nil}, diff --git a/rolling/aggregation/firstlast_test.go b/rolling/aggregation/firstlast_test.go index ea13f33..ea7fb09 100644 --- a/rolling/aggregation/firstlast_test.go +++ b/rolling/aggregation/firstlast_test.go @@ -46,7 +46,7 @@ func TestFirst(t *testing.T) { expectedBow: func() bow.Bow { b, err := bow.NewBowFromRowBasedInterfaces( []string{"time", "value"}, - []bow.Type{bow.Int64, bow.Bool}, + []bow.Type{bow.Int64, bow.Boolean}, [][]interface{}{ {10, true}, {20, nil}, @@ -120,7 +120,7 @@ func TestLast(t *testing.T) { expectedBow: func() bow.Bow { b, err := bow.NewBowFromRowBasedInterfaces( []string{"time", "value"}, - []bow.Type{bow.Int64, bow.Bool}, + []bow.Type{bow.Int64, bow.Boolean}, [][]interface{}{ {10, true}, {20, nil}, diff --git a/rolling/aggregation/mode_test.go b/rolling/aggregation/mode_test.go index ffa017e..ef762a0 100644 --- a/rolling/aggregation/mode_test.go +++ b/rolling/aggregation/mode_test.go @@ -66,7 +66,7 @@ func TestMode(t *testing.T) { expectedBow: func() bow.Bow { b, err := bow.NewBowFromRowBasedInterfaces( []string{timeCol, valueCol}, - []bow.Type{bow.Int64, bow.Bool}, + []bow.Type{bow.Int64, bow.Boolean}, [][]interface{}{ {10, true}, {20, nil}, diff --git a/rolling/aggregation/whole_test.go b/rolling/aggregation/whole_test.go index 1bd9d49..5d2a11a 100644 --- a/rolling/aggregation/whole_test.go +++ b/rolling/aggregation/whole_test.go @@ -218,7 +218,7 @@ func TestAggregate(t *testing.T) { t.Run("bool", func(t *testing.T) { b, _ := bow.NewBowFromColBasedInterfaces( []string{timeCol, valueCol}, - []bow.Type{bow.Int64, bow.Bool}, + []bow.Type{bow.Int64, bow.Boolean}, [][]interface{}{ {10, 20, 30}, {true, true, false}, diff --git a/rolling/interpolation/linear_test.go b/rolling/interpolation/linear_test.go index 6eab3ce..3d0e11b 100644 --- a/rolling/interpolation/linear_test.go +++ b/rolling/interpolation/linear_test.go @@ -101,7 +101,7 @@ func TestLinear(t *testing.T) { }) t.Run("bool error", func(t *testing.T) { - b, err := bow.NewBowFromColBasedInterfaces([]string{timeCol, valueCol}, []bow.Type{bow.Int64, bow.Bool}, [][]interface{}{ + b, err := bow.NewBowFromColBasedInterfaces([]string{timeCol, valueCol}, []bow.Type{bow.Int64, bow.Boolean}, [][]interface{}{ {10, 15}, {true, false}, }) diff --git a/rolling/interpolation/none.go b/rolling/interpolation/none.go index fc396dd..1372064 100644 --- a/rolling/interpolation/none.go +++ b/rolling/interpolation/none.go @@ -6,7 +6,7 @@ import ( ) func None(colName string) rolling.ColInterpolation { - return rolling.NewColInterpolation(colName, []bow.Type{bow.Int64, bow.Float64, bow.Bool}, + return rolling.NewColInterpolation(colName, []bow.Type{bow.Int64, bow.Float64, bow.Boolean}, func(colIndexToFill int, w rolling.Window, fullBow, prevRow bow.Bow) (interface{}, error) { return nil, nil }, diff --git a/rolling/interpolation/stepprevious.go b/rolling/interpolation/stepprevious.go index bed191b..fca01ca 100644 --- a/rolling/interpolation/stepprevious.go +++ b/rolling/interpolation/stepprevious.go @@ -7,7 +7,7 @@ import ( func StepPrevious(colName string) rolling.ColInterpolation { var prevVal interface{} - return rolling.NewColInterpolation(colName, []bow.Type{bow.Int64, bow.Float64, bow.Bool, bow.String}, + return rolling.NewColInterpolation(colName, []bow.Type{bow.Int64, bow.Float64, bow.Boolean, bow.String}, func(colIndexToFill int, w rolling.Window, fullBow, prevRow bow.Bow) (interface{}, error) { // For the first window, add the previous row to interpolate correctly if w.FirstIndex == 0 && prevRow != nil { diff --git a/rolling/interpolation/stepprevious_test.go b/rolling/interpolation/stepprevious_test.go index 0d79cf9..0642542 100644 --- a/rolling/interpolation/stepprevious_test.go +++ b/rolling/interpolation/stepprevious_test.go @@ -34,7 +34,7 @@ func TestStepPrevious(t *testing.T) { }) t.Run("bool", func(t *testing.T) { - b, _ := bow.NewBowFromColBasedInterfaces([]string{timeCol, valueCol}, []bow.Type{bow.Int64, bow.Bool}, [][]interface{}{ + b, _ := bow.NewBowFromColBasedInterfaces([]string{timeCol, valueCol}, []bow.Type{bow.Int64, bow.Boolean}, [][]interface{}{ {10, 13}, {true, false}, }) @@ -42,7 +42,7 @@ func TestStepPrevious(t *testing.T) { filled, err := r. Interpolate(WindowStart(timeCol), StepPrevious(valueCol)). Bow() - expected, _ := bow.NewBowFromColBasedInterfaces([]string{timeCol, valueCol}, []bow.Type{bow.Int64, bow.Bool}, [][]interface{}{ + expected, _ := bow.NewBowFromColBasedInterfaces([]string{timeCol, valueCol}, []bow.Type{bow.Int64, bow.Boolean}, [][]interface{}{ {10, 12, 13}, {true, true, false}, }) diff --git a/rolling/interpolation_test.go b/rolling/interpolation_test.go index b3e8cbc..1d2591f 100644 --- a/rolling/interpolation_test.go +++ b/rolling/interpolation_test.go @@ -24,7 +24,7 @@ func TestIntervalRollingIter_Interpolate(t *testing.T) { {1.0, 1.3}, }) r, _ := IntervalRolling(b, timeCol, 2, Options{}) - interp := NewColInterpolation(valueCol, []bow.Type{bow.Int64, bow.Bool}, + interp := NewColInterpolation(valueCol, []bow.Type{bow.Int64, bow.Boolean}, func(colIndex int, w Window, full, prevRow bow.Bow) (interface{}, error) { return true, nil }) From 15bf18fb34d2280ad4d3a56a58d60bf05a0cc83b Mon Sep 17 00:00:00 2001 From: agelloz Date: Thu, 28 Apr 2022 14:14:45 +0200 Subject: [PATCH 11/29] Buffer --- bowappend.go | 8 ++++---- bowbuffer.go | 32 +++++++++++--------------------- bowvalues.go | 8 ++++---- 3 files changed, 19 insertions(+), 29 deletions(-) diff --git a/bowappend.go b/bowappend.go index afd2375..4dcd2d9 100644 --- a/bowappend.go +++ b/bowappend.go @@ -42,7 +42,7 @@ func AppendBows(bows ...Bow) (Bow, error) { } data := b.(*bow).Column(colIndex).Data() arr := array.NewInt64Data(data) - v := Int64Values(arr) + v := int64Values(arr) valid := getValiditySlice(arr) builder.AppendValues(v, valid) } @@ -56,7 +56,7 @@ func AppendBows(bows ...Bow) (Bow, error) { } data := b.(*bow).Column(colIndex).Data() arr := array.NewFloat64Data(data) - v := Float64Values(arr) + v := float64Values(arr) valid := getValiditySlice(arr) builder.AppendValues(v, valid) } @@ -70,7 +70,7 @@ func AppendBows(bows ...Bow) (Bow, error) { } data := b.(*bow).Column(colIndex).Data() arr := array.NewBooleanData(data) - v := BooleanValues(arr) + v := booleanValues(arr) valid := getValiditySlice(arr) builder.AppendValues(v, valid) } @@ -84,7 +84,7 @@ func AppendBows(bows ...Bow) (Bow, error) { } data := b.(*bow).Column(colIndex).Data() arr := array.NewStringData(data) - v := StringValues(arr) + v := stringValues(arr) valid := getValiditySlice(arr) builder.AppendValues(v, valid) } diff --git a/bowbuffer.go b/bowbuffer.go index 2ac4eb5..1e0dcea 100644 --- a/bowbuffer.go +++ b/bowbuffer.go @@ -18,30 +18,20 @@ type Buffer struct { // NewBuffer returns a new Buffer of size `size` and Type `typ`. func NewBuffer(size int, typ Type) Buffer { + buf := Buffer{nullBitmapBytes: make([]byte, bitutil.CeilByte(size)/8)} switch typ { case Int64: - return Buffer{ - Data: make([]int64, size), - nullBitmapBytes: make([]byte, bitutil.CeilByte(size)/8), - } + buf.Data = make([]int64, size) case Float64: - return Buffer{ - Data: make([]float64, size), - nullBitmapBytes: make([]byte, bitutil.CeilByte(size)/8), - } + buf.Data = make([]float64, size) case Boolean: - return Buffer{ - Data: make([]bool, size), - nullBitmapBytes: make([]byte, bitutil.CeilByte(size)/8), - } + buf.Data = make([]bool, size) case String: - return Buffer{ - Data: make([]string, size), - nullBitmapBytes: make([]byte, bitutil.CeilByte(size)/8), - } + buf.Data = make([]string, size) default: - panic(fmt.Errorf("unsupported type %s", typ)) + panic(fmt.Errorf("unsupported type '%s'", typ)) } + return buf } // Len returns the length of the Buffer @@ -153,7 +143,7 @@ func (b *bow) NewBufferFromCol(colIndex int) Buffer { nullBitmapBytesCopy := make([]byte, len(nullBitmapBytes)) copy(nullBitmapBytesCopy, nullBitmapBytes) return Buffer{ - Data: Int64Values(arr), + Data: int64Values(arr), nullBitmapBytes: nullBitmapBytesCopy, } case Float64: @@ -162,7 +152,7 @@ func (b *bow) NewBufferFromCol(colIndex int) Buffer { nullBitmapBytesCopy := make([]byte, len(nullBitmapBytes)) copy(nullBitmapBytesCopy, nullBitmapBytes) return Buffer{ - Data: Float64Values(arr), + Data: float64Values(arr), nullBitmapBytes: nullBitmapBytesCopy, } case Boolean: @@ -171,7 +161,7 @@ func (b *bow) NewBufferFromCol(colIndex int) Buffer { nullBitmapBytesCopy := make([]byte, len(nullBitmapBytes)) copy(nullBitmapBytesCopy, nullBitmapBytes) return Buffer{ - Data: BooleanValues(arr), + Data: booleanValues(arr), nullBitmapBytes: nullBitmapBytesCopy, } case String: @@ -180,7 +170,7 @@ func (b *bow) NewBufferFromCol(colIndex int) Buffer { nullBitmapBytesCopy := make([]byte, len(nullBitmapBytes)) copy(nullBitmapBytesCopy, nullBitmapBytes) return Buffer{ - Data: StringValues(arr), + Data: stringValues(arr), nullBitmapBytes: nullBitmapBytesCopy, } default: diff --git a/bowvalues.go b/bowvalues.go index baf7a4d..e315180 100644 --- a/bowvalues.go +++ b/bowvalues.go @@ -2,15 +2,15 @@ package bow import "github.com/apache/arrow/go/v8/arrow/array" -func Int64Values(arr *array.Int64) []int64 { +func int64Values(arr *array.Int64) []int64 { return arr.Int64Values() } -func Float64Values(arr *array.Float64) []float64 { +func float64Values(arr *array.Float64) []float64 { return arr.Float64Values() } -func BooleanValues(arr *array.Boolean) []bool { +func booleanValues(arr *array.Boolean) []bool { var res = make([]bool, arr.Len()) for i := range res { res[i] = arr.Value(i) @@ -18,7 +18,7 @@ func BooleanValues(arr *array.Boolean) []bool { return res } -func StringValues(arr *array.String) []string { +func stringValues(arr *array.String) []string { var res = make([]string, arr.Len()) for i := range res { res[i] = arr.Value(i) From f016a29dd8283ab4be691e312dfb8540402bed9d Mon Sep 17 00:00:00 2001 From: agelloz Date: Thu, 28 Apr 2022 14:54:35 +0200 Subject: [PATCH 12/29] first --- XXXexamples_test.go | 6 +- bow_test.go | 38 ++--- bowappend_test.go | 20 +-- bowbuffer.go | 169 ++++++++++--------- bowdiff_test.go | 12 +- bowfill_test.go | 32 ++-- bowfind_test.go | 8 +- bowgetters.go | 2 +- bowgetters_test.go | 6 +- bowjoin_test.go | 80 ++++----- bowjson.go | 15 +- bowmetadata_test.go | 2 +- bowparquet_test.go | 12 +- bowrecord.go | 18 ++- bowseries.go | 178 ++++++++++----------- bowseries_test.go | 2 +- bowsetters_test.go | 24 +-- bowsort_test.go | 8 +- bowtypes.go | 63 +++----- go.mod | 2 +- rolling/aggregation/XXXbenchmarks_test.go | 4 +- rolling/aggregation/arithmeticmean_test.go | 4 +- rolling/aggregation/core_test.go | 4 +- rolling/aggregation/count_test.go | 4 +- rolling/aggregation/firstlast_test.go | 8 +- rolling/aggregation/integral_test.go | 12 +- rolling/aggregation/minmax_test.go | 8 +- rolling/aggregation/mode_test.go | 4 +- rolling/aggregation/sum_test.go | 4 +- rolling/aggregation/weightedmean_test.go | 8 +- 30 files changed, 374 insertions(+), 383 deletions(-) diff --git a/XXXexamples_test.go b/XXXexamples_test.go index ebf00ac..45ee31f 100644 --- a/XXXexamples_test.go +++ b/XXXexamples_test.go @@ -8,9 +8,9 @@ import ( func ExampleNewBow() { b, err := NewBow( - NewSeries("col1", []int64{1, 2, 3, 4}, nil), - NewSeries("col2", []float64{1.1, 2.2, 3.3, 4}, []bool{true, false, true, true}), - NewSeries("col3", []bool{true, false, true, false}, []bool{true, false, true, true}), + NewSeries("col1", Int64, []int64{1, 2, 3, 4}, nil), + NewSeries("col2", Float64, []float64{1.1, 2.2, 3.3, 4}, []bool{true, false, true, true}), + NewSeries("col3", Boolean, []bool{true, false, true, false}, []bool{true, false, true, true}), ) if err != nil { panic(err) diff --git a/bow_test.go b/bow_test.go index 932df90..26b4737 100644 --- a/bow_test.go +++ b/bow_test.go @@ -51,15 +51,15 @@ func TestNewBowFromColumnBasedInterface(t *testing.T) { func TestBow_NewSlice(t *testing.T) { origin, err := NewBowWithMetadata(NewMetadata([]string{"k"}, []string{"v"}), - NewSeries("time", []int64{1, 2, 3}, nil), - NewSeries("value", []float64{.1, .2, .3}, nil), + NewSeries("time", Int64, []int64{1, 2, 3}, nil), + NewSeries("value", Float64, []float64{.1, .2, .3}, nil), ) require.NoError(t, err) // begin expected, err := NewBowWithMetadata(NewMetadata([]string{"k"}, []string{"v"}), - NewSeries("time", []int64{1}, nil), - NewSeries("value", []float64{.1}, nil), + NewSeries("time", Int64, []int64{1}, nil), + NewSeries("value", Float64, []float64{.1}, nil), ) require.NoError(t, err) @@ -69,8 +69,8 @@ func TestBow_NewSlice(t *testing.T) { // end expected, err = NewBowWithMetadata(NewMetadata([]string{"k"}, []string{"v"}), - NewSeries("time", []int64{2, 3}, nil), - NewSeries("value", []float64{.2, .3}, nil), + NewSeries("time", Int64, []int64{2, 3}, nil), + NewSeries("value", Float64, []float64{.2, .3}, nil), ) require.NoError(t, err) @@ -80,8 +80,8 @@ func TestBow_NewSlice(t *testing.T) { // empty on already sliced bow (recursive test) expected, err = NewBowWithMetadata(NewMetadata([]string{"k"}, []string{"v"}), - NewSeries("time", []int64{}, nil), - NewSeries("value", []float64{}, nil), + NewSeries("time", Int64, []int64{}, nil), + NewSeries("value", Float64, []float64{}, nil), ) require.NoError(t, err) @@ -145,13 +145,13 @@ func TestBow_Select(t *testing.T) { t.Run("with metadata", func(t *testing.T) { b, err := NewBowWithMetadata(NewMetadata([]string{"k"}, []string{"v"}), - NewSeries("time", []int64{1, 2, 3}, []bool{true, false, true}), - NewSeries("value", []float64{1, 2, 3}, []bool{true, false, true}), + NewSeries("time", Int64, []int64{1, 2, 3}, []bool{true, false, true}), + NewSeries("value", Float64, []float64{1, 2, 3}, []bool{true, false, true}), ) require.NoError(t, err) expected, err := NewBowWithMetadata(NewMetadata([]string{"k"}, []string{"v"}), - NewSeries("time", []int64{1, 2, 3}, []bool{true, false, true}), + NewSeries("time", Int64, []int64{1, 2, 3}, []bool{true, false, true}), ) require.NoError(t, err) @@ -266,14 +266,14 @@ func TestBow_DropNils(t *testing.T) { t.Run("with metadata", func(t *testing.T) { b, err := NewBowWithMetadata(NewMetadata([]string{"k"}, []string{"v"}), - NewSeries("time", []int64{1, 2, 3}, []bool{true, false, true}), - NewSeries("value", []float64{1, 2, 3}, []bool{true, false, true}), + NewSeries("time", Int64, []int64{1, 2, 3}, []bool{true, false, true}), + NewSeries("value", Float64, []float64{1, 2, 3}, []bool{true, false, true}), ) require.NoError(t, err) expected, err := NewBowWithMetadata(NewMetadata([]string{"k"}, []string{"v"}), - NewSeries("time", []int64{1, 3}, nil), - NewSeries("value", []float64{1, 3}, nil), + NewSeries("time", Int64, []int64{1, 3}, nil), + NewSeries("value", Float64, []float64{1, 3}, nil), ) require.NoError(t, err) @@ -295,9 +295,9 @@ func TestBow_AddCols(t *testing.T) { {4, 1.4, 2.4}, }) require.NoError(t, err) - serieC := NewSeries("c", []int64{1, 2, 3, 4}, nil) - serieD := NewSeries("d", []string{"one", "two", "three", "four"}, nil) - serieE := NewSeries("e", []bool{true, false, true, false}, nil) + serieC := NewSeries("c", Int64, []int64{1, 2, 3, 4}, nil) + serieD := NewSeries("d", String, []string{"one", "two", "three", "four"}, nil) + serieE := NewSeries("e", Boolean, []bool{true, false, true, false}, nil) t.Run("empty", func(t *testing.T) { b := NewBowEmpty() @@ -335,7 +335,7 @@ func TestBow_AddCols(t *testing.T) { }) t.Run("column name already exists", func(t *testing.T) { - _, err := bow1.AddCols(NewSeries("a", []int64{1, 2, 3, 4}, nil)) + _, err := bow1.AddCols(NewSeries("a", Int64, []int64{1, 2, 3, 4}, nil)) require.Error(t, err) assert.Contains(t, err.Error(), "already exists") }) diff --git a/bowappend_test.go b/bowappend_test.go index 9f26341..bc943c8 100644 --- a/bowappend_test.go +++ b/bowappend_test.go @@ -129,20 +129,20 @@ func TestAppendBows(t *testing.T) { t.Run("2 bows with the same metadata", func(t *testing.T) { b1, err := NewBowWithMetadata(NewMetadata([]string{"k"}, []string{"v"}), - NewSeries("time", []int64{1, 2}, nil), - NewSeries("value", []float64{.1, .2}, nil), + NewSeries("time", Int64, []int64{1, 2}, nil), + NewSeries("value", Float64, []float64{.1, .2}, nil), ) require.NoError(t, err) b2, err := NewBowWithMetadata(NewMetadata([]string{"k"}, []string{"v"}), - NewSeries("time", []int64{3, 4}, nil), - NewSeries("value", []float64{.3, .4}, nil), + NewSeries("time", Int64, []int64{3, 4}, nil), + NewSeries("value", Float64, []float64{.3, .4}, nil), ) require.NoError(t, err) expected, err := NewBowWithMetadata(NewMetadata([]string{"k"}, []string{"v"}), - NewSeries("time", []int64{1, 2, 3, 4}, nil), - NewSeries("value", []float64{.1, .2, .3, .4}, nil), + NewSeries("time", Int64, []int64{1, 2, 3, 4}, nil), + NewSeries("value", Float64, []float64{.1, .2, .3, .4}, nil), ) require.NoError(t, err) @@ -178,13 +178,13 @@ func TestAppendBows(t *testing.T) { func BenchmarkAppendBows(b *testing.B) { for rows := 10; rows <= 100000; rows *= 10 { b1, err := NewBow( - NewSeries("time", make([]int64, rows), nil), - NewSeries("value", make([]float64, rows), nil)) + NewSeries("time", Int64, make([]int64, rows), nil), + NewSeries("value", Float64, make([]float64, rows), nil)) require.NoError(b, err) b2, err := NewBow( - NewSeries("time", make([]int64, rows), nil), - NewSeries("value", make([]float64, rows), nil)) + NewSeries("time", Int64, make([]int64, rows), nil), + NewSeries("value", Float64, make([]float64, rows), nil)) require.NoError(b, err) b.Run(fmt.Sprintf("%d_rows", rows), func(b *testing.B) { diff --git a/bowbuffer.go b/bowbuffer.go index 1e0dcea..b94c6d3 100644 --- a/bowbuffer.go +++ b/bowbuffer.go @@ -10,15 +10,20 @@ import ( // Buffer is a mutable data structure with the purpose of easily building data Series with: // - Data: slice of data. +// - DataType: type of the data. // - nullBitmapBytes: slice of bytes representing valid or null values. type Buffer struct { Data interface{} + DataType Type nullBitmapBytes []byte } // NewBuffer returns a new Buffer of size `size` and Type `typ`. func NewBuffer(size int, typ Type) Buffer { - buf := Buffer{nullBitmapBytes: make([]byte, bitutil.CeilByte(size)/8)} + buf := Buffer{ + DataType: typ, + nullBitmapBytes: make([]byte, bitutil.CeilByte(size)/8), + } switch typ { case Int64: buf.Data = make([]int64, size) @@ -34,37 +39,34 @@ func NewBuffer(size int, typ Type) Buffer { return buf } -// Len returns the length of the Buffer func (b Buffer) Len() int { - switch data := b.Data.(type) { - case []int64: - return len(data) - case []float64: - return len(data) - case []bool: - return len(data) - case []string: - return len(data) + switch b.DataType { + case Int64: + return len(b.Data.([]int64)) + case Float64: + return len(b.Data.([]float64)) + case Boolean: + return len(b.Data.([]bool)) + case String: + return len(b.Data.([]string)) default: - panic(fmt.Errorf("unsupported type '%T'", b.Data)) + panic(fmt.Errorf("unsupported type '%s'", b.DataType)) } } -// SetOrDrop sets the value `value` at index `i` by attempting a type conversion to the Buffer Type. -// Set the bit in the Buffer nullBitmapBytes if the conversion succeeded, or clear it otherwise. func (b *Buffer) SetOrDrop(i int, value interface{}) { var valid bool - switch v := b.Data.(type) { - case []int64: - v[i], valid = Int64.Convert(value).(int64) - case []float64: - v[i], valid = Float64.Convert(value).(float64) - case []bool: - v[i], valid = Boolean.Convert(value).(bool) - case []string: - v[i], valid = String.Convert(value).(string) + switch b.DataType { + case Int64: + b.Data.([]int64)[i], valid = Int64.Convert(value).(int64) + case Float64: + b.Data.([]float64)[i], valid = Float64.Convert(value).(float64) + case Boolean: + b.Data.([]bool)[i], valid = Boolean.Convert(value).(bool) + case String: + b.Data.([]string)[i], valid = String.Convert(value).(string) default: - panic(fmt.Errorf("unsupported type %T", v)) + panic(fmt.Errorf("unsupported type '%s'", b.DataType)) } if valid { @@ -74,21 +76,19 @@ func (b *Buffer) SetOrDrop(i int, value interface{}) { } } -// SetOrDropStrict sets the value `value` at index `i` by attempting a type assertion to the Buffer Type. -// Set the bit in the Buffer nullBitmapBytes if the type assertion succeeded, or clear it otherwise. func (b *Buffer) SetOrDropStrict(i int, value interface{}) { var valid bool - switch v := b.Data.(type) { - case []int64: - v[i], valid = value.(int64) - case []float64: - v[i], valid = value.(float64) - case []bool: - v[i], valid = value.(bool) - case []string: - v[i], valid = value.(string) + switch b.DataType { + case Int64: + b.Data.([]int64)[i], valid = value.(int64) + case Float64: + b.Data.([]float64)[i], valid = value.(float64) + case Boolean: + b.Data.([]bool)[i], valid = value.(bool) + case String: + b.Data.([]string)[i], valid = value.(string) default: - panic(fmt.Errorf("unsupported type %T", v)) + panic(fmt.Errorf("unsupported type '%s'", b.DataType)) } if valid { @@ -98,113 +98,106 @@ func (b *Buffer) SetOrDropStrict(i int, value interface{}) { } } -// GetValue gets the value at index `i` from the Buffer func (b *Buffer) GetValue(i int) interface{} { if bitutil.BitIsNotSet(b.nullBitmapBytes, i) { return nil } - switch v := b.Data.(type) { - case []int64: - return v[i] - case []float64: - return v[i] - case []bool: - return v[i] - case []string: - return v[i] + + switch b.DataType { + case Int64: + return b.Data.([]int64)[i] + case Float64: + return b.Data.([]float64)[i] + case Boolean: + return b.Data.([]bool)[i] + case String: + return b.Data.([]string)[i] default: - panic(fmt.Errorf("unsupported type %T", v)) + panic(fmt.Errorf("unsupported type '%s'", b.DataType)) } } -// Less returns whether the value at index `i` is less that the value at index `j`. func (b Buffer) Less(i, j int) bool { - switch v := b.Data.(type) { - case []int64: - return v[i] < v[j] - case []float64: - return v[i] < v[j] - case []string: - return v[i] < v[j] - case []bool: - return !v[i] && v[j] + switch b.DataType { + case Int64: + return b.Data.([]int64)[i] < b.Data.([]int64)[j] + case Float64: + return b.Data.([]float64)[i] < b.Data.([]float64)[j] + case String: + return b.Data.([]string)[i] < b.Data.([]string)[j] + case Boolean: + return !b.Data.([]bool)[i] && b.Data.([]bool)[j] default: - panic(fmt.Errorf("unsupported type %T", v)) + panic(fmt.Errorf("unsupported type '%s'", b.DataType)) } } -// NewBufferFromCol returns a new Buffer created from the column at index `colIndex`. func (b *bow) NewBufferFromCol(colIndex int) Buffer { - data := b.Column(colIndex).Data() + res := Buffer{DataType: b.ColumnType(colIndex)} + arrayData := b.Column(colIndex).Data() switch b.ColumnType(colIndex) { case Int64: - arr := array.NewInt64Data(data) + arr := array.NewInt64Data(arrayData) nullBitmapBytes := arr.NullBitmapBytes()[:bitutil.CeilByte(arr.Data().Len())/8] nullBitmapBytesCopy := make([]byte, len(nullBitmapBytes)) copy(nullBitmapBytesCopy, nullBitmapBytes) - return Buffer{ - Data: int64Values(arr), - nullBitmapBytes: nullBitmapBytesCopy, - } + res.Data = int64Values(arr) + res.nullBitmapBytes = nullBitmapBytesCopy case Float64: - arr := array.NewFloat64Data(data) + arr := array.NewFloat64Data(arrayData) nullBitmapBytes := arr.NullBitmapBytes()[:bitutil.CeilByte(arr.Data().Len())/8] nullBitmapBytesCopy := make([]byte, len(nullBitmapBytes)) copy(nullBitmapBytesCopy, nullBitmapBytes) - return Buffer{ - Data: float64Values(arr), - nullBitmapBytes: nullBitmapBytesCopy, - } + res.Data = float64Values(arr) + res.nullBitmapBytes = nullBitmapBytesCopy case Boolean: - arr := array.NewBooleanData(data) + arr := array.NewBooleanData(arrayData) nullBitmapBytes := arr.NullBitmapBytes()[:bitutil.CeilByte(arr.Data().Len())/8] nullBitmapBytesCopy := make([]byte, len(nullBitmapBytes)) copy(nullBitmapBytesCopy, nullBitmapBytes) - return Buffer{ - Data: booleanValues(arr), - nullBitmapBytes: nullBitmapBytesCopy, - } + res.Data = booleanValues(arr) + res.nullBitmapBytes = nullBitmapBytesCopy case String: - arr := array.NewStringData(data) + arr := array.NewStringData(arrayData) nullBitmapBytes := arr.NullBitmapBytes()[:bitutil.CeilByte(arr.Data().Len())/8] nullBitmapBytesCopy := make([]byte, len(nullBitmapBytes)) copy(nullBitmapBytesCopy, nullBitmapBytes) - return Buffer{ - Data: stringValues(arr), - nullBitmapBytes: nullBitmapBytesCopy, - } + res.Data = stringValues(arr) + res.nullBitmapBytes = nullBitmapBytesCopy default: - panic(fmt.Errorf( - "unsupported type %+v", b.ColumnType(colIndex))) + panic(fmt.Errorf("unsupported type '%s'", b.ColumnType(colIndex))) } + + return res } + func buildNullBitmapBytes(dataLength int, validityArray interface{}) []byte { var res []byte nullBitmapLength := bitutil.CeilByte(dataLength) / 8 - switch valid := validityArray.(type) { + switch validityArray := validityArray.(type) { case nil: res = make([]byte, nullBitmapLength) for i := 0; i < dataLength; i++ { bitutil.SetBit(res, i) } case []bool: - if len(valid) != dataLength { + if len(validityArray) != dataLength { panic(fmt.Errorf("dataArray and validityArray have different lengths")) } res = make([]byte, nullBitmapLength) for i := 0; i < dataLength; i++ { - if valid[i] { + if validityArray[i] { bitutil.SetBit(res, i) } } case []byte: - if len(valid) != nullBitmapLength { + if len(validityArray) != nullBitmapLength { panic(fmt.Errorf("dataArray and validityArray have different lengths")) } - return valid + return validityArray default: - panic(fmt.Errorf("unsupported type %T", valid)) + panic(fmt.Errorf("unsupported type '%T'", validityArray)) } return res diff --git a/bowdiff_test.go b/bowdiff_test.go index d617d7e..38a5ca0 100644 --- a/bowdiff_test.go +++ b/bowdiff_test.go @@ -10,26 +10,26 @@ import ( func TestDiff(t *testing.T) { t.Run("all columns all supported types with nils and metadata", func(t *testing.T) { b, err := NewBowWithMetadata(NewMetadata([]string{"k"}, []string{"v"}), - NewSeries("a", + NewSeries("a", Int64, []int64{1, 2, 3, 4, 0, 5}, []bool{true, true, true, true, false, true}), - NewSeries("b", + NewSeries("b", Float64, []float64{1., 2., 3., 4., 0., 5.}, []bool{true, true, true, true, false, true}), - NewSeries("c", + NewSeries("c", Boolean, []bool{false, false, true, true, false, false}, []bool{true, true, true, true, false, true}), ) require.NoError(t, err) expected, err := NewBowWithMetadata(NewMetadata([]string{"k"}, []string{"v"}), - NewSeries("a", + NewSeries("a", Int64, []int64{0, 1, 1, 1, 0, 0}, []bool{false, true, true, true, false, false}), - NewSeries("b", + NewSeries("b", Float64, []float64{0., 1., 1., 1., 0., 0.}, []bool{false, true, true, true, false, false}), - NewSeries("c", + NewSeries("c", Boolean, []bool{false, false, true, false, false, false}, []bool{false, true, true, true, false, false}), ) diff --git a/bowfill_test.go b/bowfill_test.go index 16e2243..cdd2f29 100644 --- a/bowfill_test.go +++ b/bowfill_test.go @@ -506,13 +506,13 @@ func TestFill(t *testing.T) { t.Run("with metadata", func(t *testing.T) { // Previous b1, err := NewBowWithMetadata(NewMetadata([]string{"k"}, []string{"v"}), - NewSeries("int", []int64{1, 0, 3}, []bool{true, false, true}), - NewSeries("float", []float64{1., 0., 3.}, []bool{true, false, true}), + NewSeries("int", Int64, []int64{1, 0, 3}, []bool{true, false, true}), + NewSeries("float", Float64, []float64{1., 0., 3.}, []bool{true, false, true}), ) require.NoError(t, err) expected, err := NewBowWithMetadata(NewMetadata([]string{"k"}, []string{"v"}), - NewSeries("int", []int64{1, 1, 3}, []bool{true, true, true}), - NewSeries("float", []float64{1., 1., 3.}, []bool{true, true, true}), + NewSeries("int", Int64, []int64{1, 1, 3}, []bool{true, true, true}), + NewSeries("float", Float64, []float64{1., 1., 3.}, []bool{true, true, true}), ) require.NoError(t, err) @@ -522,13 +522,13 @@ func TestFill(t *testing.T) { // Next b2, err := NewBowWithMetadata(NewMetadata([]string{"k"}, []string{"v"}), - NewSeries("int", []int64{1, 0, 3}, []bool{true, false, true}), - NewSeries("float", []float64{1., 0., 3.}, []bool{true, false, true}), + NewSeries("int", Int64, []int64{1, 0, 3}, []bool{true, false, true}), + NewSeries("float", Float64, []float64{1., 0., 3.}, []bool{true, false, true}), ) require.NoError(t, err) expected, err = NewBowWithMetadata(NewMetadata([]string{"k"}, []string{"v"}), - NewSeries("int", []int64{1, 3, 3}, []bool{true, true, true}), - NewSeries("float", []float64{1., 3., 3.}, []bool{true, true, true}), + NewSeries("int", Int64, []int64{1, 3, 3}, []bool{true, true, true}), + NewSeries("float", Float64, []float64{1., 3., 3.}, []bool{true, true, true}), ) require.NoError(t, err) @@ -538,13 +538,13 @@ func TestFill(t *testing.T) { // Mean b3, err := NewBowWithMetadata(NewMetadata([]string{"k"}, []string{"v"}), - NewSeries("int", []int64{1, 0, 3}, []bool{true, false, true}), - NewSeries("float", []float64{1., 0., 3.}, []bool{true, false, true}), + NewSeries("int", Int64, []int64{1, 0, 3}, []bool{true, false, true}), + NewSeries("float", Float64, []float64{1., 0., 3.}, []bool{true, false, true}), ) require.NoError(t, err) expected, err = NewBowWithMetadata(NewMetadata([]string{"k"}, []string{"v"}), - NewSeries("int", []int64{1, 2, 3}, []bool{true, true, true}), - NewSeries("float", []float64{1., 2., 3.}, []bool{true, true, true}), + NewSeries("int", Int64, []int64{1, 2, 3}, []bool{true, true, true}), + NewSeries("float", Float64, []float64{1., 2., 3.}, []bool{true, true, true}), ) require.NoError(t, err) @@ -554,13 +554,13 @@ func TestFill(t *testing.T) { // Linear b4, err := NewBowWithMetadata(NewMetadata([]string{"k"}, []string{"v"}), - NewSeries("int", []int64{1, 0, 3}, []bool{true, false, true}), - NewSeries("float", []float64{1., 0., 3.}, []bool{true, false, true}), + NewSeries("int", Int64, []int64{1, 0, 3}, []bool{true, false, true}), + NewSeries("float", Float64, []float64{1., 0., 3.}, []bool{true, false, true}), ) require.NoError(t, err) expected, err = NewBowWithMetadata(NewMetadata([]string{"k"}, []string{"v"}), - NewSeries("int", []int64{1, 2, 3}, []bool{true, false, true}), - NewSeries("float", []float64{1., 2., 3.}, []bool{true, false, true}), + NewSeries("int", Int64, []int64{1, 2, 3}, []bool{true, false, true}), + NewSeries("float", Float64, []float64{1., 2., 3.}, []bool{true, false, true}), ) require.NoError(t, err) diff --git a/bowfind_test.go b/bowfind_test.go index 12d119c..27c4976 100644 --- a/bowfind_test.go +++ b/bowfind_test.go @@ -7,16 +7,16 @@ import ( ) var sortedTestBow, _ = NewBow( - NewSeries(Int64.String(), + NewSeries(Int64.String(), Int64, []int64{0, 1, 0, 0}, []bool{true, true, false, true}), - NewSeries(Float64.String(), + NewSeries(Float64.String(), Float64, []float64{0., 1., 0., 0.}, []bool{true, true, false, true}), - NewSeries(String.String(), + NewSeries(String.String(), String, []string{"0", "1", "0", "0"}, []bool{true, true, false, true}), - NewSeries(Boolean.String(), + NewSeries(Boolean.String(), Boolean, []bool{false, true, false, false}, []bool{true, true, false, true}), ) diff --git a/bowgetters.go b/bowgetters.go index a123795..b0c4035 100644 --- a/bowgetters.go +++ b/bowgetters.go @@ -312,7 +312,7 @@ func (b *bow) GetNextFloat64s(colIndex1, colIndex2, rowIndex int) (float64, floa // ColumnType returns the Bow type from the column `colIndex`. func (b *bow) ColumnType(colIndex int) Type { - return getBowTypeFromArrowType(b.Schema().Field(colIndex).Type) + return getBowTypeFromArrowFingerprint(b.Schema().Field(colIndex).Type.Fingerprint()) } // ColumnIndex returns the index of the column with the name `colName`, and an error. diff --git a/bowgetters_test.go b/bowgetters_test.go index 2a59a67..5b5f2bf 100644 --- a/bowgetters_test.go +++ b/bowgetters_test.go @@ -45,7 +45,7 @@ func TestBow_Distinct(t *testing.T) { t.Run(Int64.String(), func(t *testing.T) { res := b.Distinct(0) - expect, err := NewBow(NewSeries("time", []int64{1, 2, 3}, nil)) + expect, err := NewBow(NewSeries("time", Int64, []int64{1, 2, 3}, nil)) require.NoError(t, err) ExpectEqual(t, expect, res) @@ -53,7 +53,7 @@ func TestBow_Distinct(t *testing.T) { t.Run(Float64.String(), func(t *testing.T) { res := b.Distinct(1) - expect, err := NewBow(NewSeries("value", []float64{1.1, 2.2, 3.3}, nil)) + expect, err := NewBow(NewSeries("value", Float64, []float64{1.1, 2.2, 3.3}, nil)) require.NoError(t, err) ExpectEqual(t, expect, res) @@ -61,7 +61,7 @@ func TestBow_Distinct(t *testing.T) { t.Run(String.String(), func(t *testing.T) { res := b.Distinct(2) - expect, err := NewBow(NewSeries("meta", []string{"", "3.3", "test"}, nil)) + expect, err := NewBow(NewSeries("meta", String, []string{"", "3.3", "test"}, nil)) require.NoError(t, err) ExpectEqual(t, expect, res) diff --git a/bowjoin_test.go b/bowjoin_test.go index b9ff019..1c7938b 100644 --- a/bowjoin_test.go +++ b/bowjoin_test.go @@ -60,9 +60,9 @@ func TestBow_OuterJoin(t *testing.T) { t.Run("left and right without rows", func(t *testing.T) { b1, err := NewBow( - NewSeries("index1", []int64{}, nil), - NewSeries("index2", []float64{}, nil), - NewSeries("col1", []int64{}, nil), + NewSeries("index1", Int64, []int64{}, nil), + NewSeries("index2", Float64, []float64{}, nil), + NewSeries("col1", Int64, []int64{}, nil), ) require.NoError(t, err) @@ -110,22 +110,22 @@ func TestBow_OuterJoin(t *testing.T) { t.Run("left and right bow without rows", func(t *testing.T) { b1, err := NewBow( - NewSeries("index1", []int64{}, nil), - NewSeries("index2", []float64{}, nil), - NewSeries("col1", []int64{}, nil), + NewSeries("index1", Int64, []int64{}, nil), + NewSeries("index2", Float64, []float64{}, nil), + NewSeries("col1", Int64, []int64{}, nil), ) require.NoError(t, err) b2, err := NewBow( - NewSeries("index1", []int64{}, nil), - NewSeries("index2", []float64{}, nil), - NewSeries("col2", []int64{}, nil), + NewSeries("index1", Int64, []int64{}, nil), + NewSeries("index2", Float64, []float64{}, nil), + NewSeries("col2", Int64, []int64{}, nil), ) require.NoError(t, err) expected, err := NewBow( - NewSeries("index1", []int64{}, nil), - NewSeries("index2", []float64{}, nil), - NewSeries("col1", []int64{}, nil), - NewSeries("col2", []int64{}, nil), + NewSeries("index1", Int64, []int64{}, nil), + NewSeries("index2", Float64, []float64{}, nil), + NewSeries("col1", Int64, []int64{}, nil), + NewSeries("col2", Int64, []int64{}, nil), ) require.NoError(t, err) @@ -397,17 +397,17 @@ func TestBow_OuterJoin(t *testing.T) { t.Run("with metadata", func(t *testing.T) { b1, err := NewBowWithMetadata(NewMetadata([]string{"k1"}, []string{"v1"}), - NewSeries("index1", []int64{1}, nil), + NewSeries("index1", Int64, []int64{1}, nil), ) require.NoError(t, err) b2, err := NewBowWithMetadata(NewMetadata([]string{"k2"}, []string{"v2"}), - NewSeries("index1", []int64{1}, nil), + NewSeries("index1", Int64, []int64{1}, nil), ) require.NoError(t, err) expected, err := NewBowWithMetadata(NewMetadata([]string{"k1", "k2"}, []string{"v1", "v2"}), - NewSeries("index1", []int64{1}, nil), + NewSeries("index1", Int64, []int64{1}, nil), ) require.NoError(t, err) @@ -531,23 +531,23 @@ func TestBow_InnerJoin(t *testing.T) { t.Run("no common rows", func(t *testing.T) { b1, err := NewBow( - NewSeries("index1", []int64{1, 1, 2, 3, 4}, nil), - NewSeries("index2", []float64{1.1, 1.1, 2.2, 3.3, 4.4}, []bool{true, true, false, true, true}), - NewSeries("col1", []int64{1, 2, 3, 4, 5}, []bool{true, false, true, true, true}), + NewSeries("index1", Int64, []int64{1, 1, 2, 3, 4}, nil), + NewSeries("index2", Float64, []float64{1.1, 1.1, 2.2, 3.3, 4.4}, []bool{true, true, false, true, true}), + NewSeries("col1", Int64, []int64{1, 2, 3, 4, 5}, []bool{true, false, true, true, true}), ) require.NoError(t, err) b2, err := NewBow( - NewSeries("index1", []int64{10}, nil), - NewSeries("col2", []int64{10}, nil), + NewSeries("index1", Int64, []int64{10}, nil), + NewSeries("col2", Int64, []int64{10}, nil), ) require.NoError(t, err) expected, err := NewBow( - NewSeries("index1", []int64{}, nil), - NewSeries("index2", []float64{}, nil), - NewSeries("col1", []int64{}, []bool{}), - NewSeries("col2", []int64{}, nil), + NewSeries("index1", Int64, []int64{}, nil), + NewSeries("index2", Float64, []float64{}, nil), + NewSeries("col1", Int64, []int64{}, []bool{}), + NewSeries("col2", Int64, []int64{}, nil), ) require.NoError(t, err) @@ -557,14 +557,14 @@ func TestBow_InnerJoin(t *testing.T) { t.Run("incompatible types", func(t *testing.T) { b1, err := NewBow( - NewSeries("index1", []int64{1, 1, 2, 3, 4}, nil), - NewSeries("index2", []float64{1.1, 1.1, 2.2, 3.3, 4.4}, []bool{true, true, false, true, true}), - NewSeries("col1", []int64{1, 2, 3, 4, 5}, []bool{true, false, true, true, true}), + NewSeries("index1", Int64, []int64{1, 1, 2, 3, 4}, nil), + NewSeries("index2", Float64, []float64{1.1, 1.1, 2.2, 3.3, 4.4}, []bool{true, true, false, true, true}), + NewSeries("col1", Int64, []int64{1, 2, 3, 4, 5}, []bool{true, false, true, true, true}), ) require.NoError(t, err) b2, err := NewBow( - NewSeries("index1", []float64{1}, nil), + NewSeries("index1", Float64, []float64{1}, nil), ) require.NoError(t, err) @@ -580,22 +580,22 @@ func TestBow_InnerJoin(t *testing.T) { t.Run("no common columns", func(t *testing.T) { b1, err := NewBow( - NewSeries("index1", []int64{1, 1, 2, 3, 4}, nil), - NewSeries("index2", []float64{1.1, 1.1, 2.2, 3.3, 4.4}, []bool{true, true, false, true, true}), - NewSeries("col1", []int64{1, 2, 3, 4, 5}, []bool{true, false, true, true, true}), + NewSeries("index1", Int64, []int64{1, 1, 2, 3, 4}, nil), + NewSeries("index2", Float64, []float64{1.1, 1.1, 2.2, 3.3, 4.4}, []bool{true, true, false, true, true}), + NewSeries("col1", Int64, []int64{1, 2, 3, 4, 5}, []bool{true, false, true, true, true}), ) require.NoError(t, err) b2, err := NewBow( - NewSeries("index3", []float64{1.1}, nil), + NewSeries("index3", Float64, []float64{1.1}, nil), ) require.NoError(t, err) expected, err := NewBow( - NewSeries("index1", []int64{}, nil), - NewSeries("index2", []float64{}, nil), - NewSeries("col1", []int64{}, nil), - NewSeries("index3", []float64{}, []bool{}), + NewSeries("index1", Int64, []int64{}, nil), + NewSeries("index2", Float64, []float64{}, nil), + NewSeries("col1", Int64, []int64{}, nil), + NewSeries("index3", Float64, []float64{}, []bool{}), ) require.NoError(t, err) @@ -605,17 +605,17 @@ func TestBow_InnerJoin(t *testing.T) { t.Run("with metadata", func(t *testing.T) { b1, err := NewBowWithMetadata(NewMetadata([]string{"k1"}, []string{"v1"}), - NewSeries("index1", []int64{1}, nil), + NewSeries("index1", Int64, []int64{1}, nil), ) require.NoError(t, err) b2, err := NewBowWithMetadata(NewMetadata([]string{"k2"}, []string{"v2"}), - NewSeries("index1", []int64{1}, nil), + NewSeries("index1", Int64, []int64{1}, nil), ) require.NoError(t, err) expected, err := NewBowWithMetadata(NewMetadata([]string{"k1", "k2"}, []string{"v1", "v2"}), - NewSeries("index1", []int64{1}, nil), + NewSeries("index1", Int64, []int64{1}, nil), ) require.NoError(t, err) diff --git a/bowjson.go b/bowjson.go index 1ff1145..d679793 100644 --- a/bowjson.go +++ b/bowjson.go @@ -2,6 +2,7 @@ package bow import ( "encoding/json" + "fmt" ) type jsonField struct { @@ -57,11 +58,11 @@ func NewJSONBow(b Bow) JSONBow { func (b *bow) UnmarshalJSON(data []byte) error { jsonB := JSONBow{} if err := json.Unmarshal(data, &jsonB); err != nil { - return err + return fmt.Errorf("json.Unmarshal: %w", err) } if err := b.NewValuesFromJSON(jsonB); err != nil { - return err + return fmt.Errorf("bow.NewValuesFromJSON: %w", err) } return nil @@ -94,7 +95,13 @@ func (b *bow) NewValuesFromJSON(jsonB JSONBow) error { */ for fieldIndex, field := range jsonB.Schema.Fields { - if _, ok := mapArrowNameToBowTypes[field.Type]; ok { + ok := false + for _, arrowType := range mapBowToArrowDataTypes { + if arrowType.Name() == field.Type { + ok = true + } + } + if ok { continue } switch field.Type { @@ -111,7 +118,7 @@ func (b *bow) NewValuesFromJSON(jsonB JSONBow) error { if jsonB.RowBasedData == nil { for i, field := range jsonB.Schema.Fields { - typ := getBowTypeFromArrowName(field.Type) + typ := getBowTypeFromArrowFingerprint(field.Type) buf := NewBuffer(0, typ) series[i] = NewSeriesFromBuffer(field.Name, buf) } diff --git a/bowmetadata_test.go b/bowmetadata_test.go index 7955564..10042d0 100644 --- a/bowmetadata_test.go +++ b/bowmetadata_test.go @@ -9,7 +9,7 @@ import ( func TestBow_WithMetadata(t *testing.T) { t.Run("adding meta should not modify bow, but correctly change schema", func(t *testing.T) { metadata := NewMetadata([]string{"testKey"}, []string{"testValue"}) - b, _ := NewBow(NewSeries("test", []int64{1, 2}, nil)) + b, _ := NewBow(NewSeries("test", Int64, []int64{1, 2}, nil)) res := b.WithMetadata(metadata) assert.True(t, res.Metadata().Equal(metadata.Metadata), diff --git a/bowparquet_test.go b/bowparquet_test.go index d8bc1c2..0e02029 100644 --- a/bowparquet_test.go +++ b/bowparquet_test.go @@ -83,8 +83,8 @@ func TestParquet(t *testing.T) { t.Run("bow with context and col_types metadata", func(t *testing.T) { var series = make([]Series, 2) - series[0] = NewSeries("time", []int64{0}, []bool{true}) - series[1] = NewSeries(" va\"lue ", []float64{0.}, []bool{true}) + series[0] = NewSeries("time", Int64, []int64{0}, []bool{true}) + series[1] = NewSeries(" va\"lue ", Float64, []float64{0.}, []bool{true}) var keys, values []string type Unit struct { @@ -125,8 +125,8 @@ func TestParquet(t *testing.T) { t.Run("bow with wrong col_types metadata", func(t *testing.T) { var series = make([]Series, 2) - series[0] = NewSeries("time", []int64{0}, []bool{true}) - series[1] = NewSeries("value", []float64{0.}, []bool{true}) + series[0] = NewSeries("time", Int64, []int64{0}, []bool{true}) + series[1] = NewSeries("value", Float64, []float64{0.}, []bool{true}) var keys, values []string @@ -142,8 +142,8 @@ func TestParquet(t *testing.T) { func TestBowGetParquetMetaColTimeUnit(t *testing.T) { timeCol := "time" var series = make([]Series, 2) - series[0] = NewSeries(timeCol, []int64{0}, nil) - series[1] = NewSeries("value", []float64{0.}, nil) + series[0] = NewSeries(timeCol, Int64, []int64{0}, nil) + series[1] = NewSeries("value", Float64, []float64{0.}, nil) t.Run("time.Millisecond", func(t *testing.T) { b, err := NewBowWithMetadata( diff --git a/bowrecord.go b/bowrecord.go index 44e0a1b..510721c 100644 --- a/bowrecord.go +++ b/bowrecord.go @@ -8,6 +8,16 @@ import ( "github.com/apache/arrow/go/v8/arrow/array" ) +func NewBowFromRecord(record arrow.Record) (Bow, error) { + for _, f := range record.Schema().Fields() { + if getBowTypeFromArrowFingerprint(f.Type.Fingerprint()) == Unknown { + return nil, fmt.Errorf("unsupported type '%s'", f.Type) + } + } + + return &bow{Record: record}, nil +} + func newRecord(metadata Metadata, series ...Series) (arrow.Record, error) { var fields []arrow.Field var arrays []arrow.Array @@ -24,7 +34,7 @@ func newRecord(metadata Metadata, series ...Series) (arrow.Record, error) { if s.Name == "" { return nil, errors.New("empty Series name") } - if getBowTypeFromArrowType(s.Array.DataType()) == Unknown { + if getBowTypeFromArrowFingerprint(s.Array.DataType().Fingerprint()) == Unknown { return nil, fmt.Errorf("unsupported type '%s'", s.Array.DataType()) } if int64(s.Array.Len()) != nRows { @@ -33,7 +43,11 @@ func newRecord(metadata Metadata, series ...Series) (arrow.Record, error) { "bow.Series '%s' has a length of %d, which is different from the previous ones", s.Name, s.Array.Len()) } - fields = append(fields, arrow.Field{Name: s.Name, Type: s.Array.DataType()}) + fields = append(fields, arrow.Field{ + Name: s.Name, + Type: s.Array.DataType(), + Nullable: true, + }) arrays = append(arrays, s.Array) } diff --git a/bowseries.go b/bowseries.go index 8ddc177..45b43ed 100644 --- a/bowseries.go +++ b/bowseries.go @@ -4,9 +4,9 @@ import ( "encoding/json" "fmt" + "github.com/apache/arrow/go/arrow/bitutil" "github.com/apache/arrow/go/v8/arrow" "github.com/apache/arrow/go/v8/arrow/array" - "github.com/apache/arrow/go/v8/arrow/bitutil" "github.com/apache/arrow/go/v8/arrow/memory" ) @@ -17,106 +17,56 @@ type Series struct { Array arrow.Array } -// NewSeries returns a new Series from: -// - name: string -// - dataArray: slice of the data in any of the Bow supported types -// - validityArray: -// - If nil, the data will be non-nil -// - Can be of type []bool or []byte to represent nil values -func NewSeries(name string, dataArray interface{}, validityArray interface{}) Series { - switch v := dataArray.(type) { - case []int64: - return newInt64Series(name, v, buildNullBitmapBytes(len(v), validityArray)) - case []float64: - return newFloat64Series(name, v, buildNullBitmapBytes(len(v), validityArray)) - case []bool: - return newBooleanSeries(name, v, buildNullBitmapBytes(len(v), validityArray)) - case []string: - return newStringSeries(name, v, buildNullBitmapBytes(len(v), validityArray)) +func NewSeries(name string, typ Type, dataArray interface{}, validityArray interface{}) Series { + switch typ { + case Int64: + return newInt64Series(name, dataArray.([]int64), + buildNullBitmapBytes(len(dataArray.([]int64)), validityArray)) + case Float64: + return newFloat64Series(name, dataArray.([]float64), + buildNullBitmapBytes(len(dataArray.([]float64)), validityArray)) + case Boolean: + return newBooleanSeries(name, dataArray.([]bool), + buildNullBitmapBytes(len(dataArray.([]bool)), validityArray)) + case String: + return newStringSeries(name, dataArray.([]string), + buildNullBitmapBytes(len(dataArray.([]string)), validityArray)) default: - panic(fmt.Errorf("unsupported type %T", v)) + panic(fmt.Errorf("unsupported type '%s'", typ)) } } -// NewSeriesFromBuffer returns a new Series from a name and a Buffer. func NewSeriesFromBuffer(name string, buf Buffer) Series { - switch data := buf.Data.(type) { - case []int64: - return newInt64Series(name, data, buf.nullBitmapBytes) - case []float64: - return newFloat64Series(name, data, buf.nullBitmapBytes) - case []bool: - return newBooleanSeries(name, data, buf.nullBitmapBytes) - case []string: - return newStringSeries(name, data, buf.nullBitmapBytes) + switch buf.DataType { + case Int64: + return newInt64Series(name, buf.Data.([]int64), buf.nullBitmapBytes) + case Float64: + return newFloat64Series(name, buf.Data.([]float64), buf.nullBitmapBytes) + case Boolean: + return newBooleanSeries(name, buf.Data.([]bool), buf.nullBitmapBytes) + case String: + return newStringSeries(name, buf.Data.([]string), buf.nullBitmapBytes) default: - panic(fmt.Errorf("unsupported type '%T'", buf.Data)) - } -} - -func newInt64Series(name string, data []int64, valid []byte) Series { - length := len(data) - return Series{ - Name: name, - Array: array.NewInt64Data( - array.NewData(mapBowToArrowTypes[Int64], length, - []*memory.Buffer{ - memory.NewBufferBytes(valid), - memory.NewBufferBytes(arrow.Int64Traits.CastToBytes(data)), - }, nil, length-bitutil.CountSetBits(valid, 0, length), 0), - ), + panic(fmt.Errorf("unsupported type '%s'", buf.DataType)) } } -func newFloat64Series(name string, data []float64, valid []byte) Series { - length := len(data) - return Series{ - Name: name, - Array: array.NewFloat64Data( - array.NewData(mapBowToArrowTypes[Float64], length, - []*memory.Buffer{ - memory.NewBufferBytes(valid), - memory.NewBufferBytes(arrow.Float64Traits.CastToBytes(data)), - }, nil, length-bitutil.CountSetBits(valid, 0, length), 0), - ), - } -} - -func newBooleanSeries(name string, data []bool, valid []byte) Series { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - builder := array.NewBooleanBuilder(mem) - defer builder.Release() - builder.AppendValues(data, buildNullBitmapBool(len(data), valid)) - return Series{Name: name, Array: builder.NewArray()} -} - -func newStringSeries(name string, data []string, valid []byte) Series { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - builder := array.NewStringBuilder(mem) - defer builder.Release() - builder.AppendValues(data, buildNullBitmapBool(len(data), valid)) - return Series{Name: name, Array: builder.NewArray()} -} - -// NewSeriesFromInterfaces returns a new Series from: -// - name: string -// - typ: Bow Type -// - data: represented by a slice of interface{}, with eventually nil values -func NewSeriesFromInterfaces(name string, typ Type, data []interface{}) Series { +func NewSeriesFromInterfaces(name string, typ Type, cells []interface{}) Series { if typ == Unknown { var err error - if typ, err = getBowTypeFromInterfaces(data); err != nil { + if typ, err = getBowTypeFromInterfaces(cells); err != nil { panic(err) } } + mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) switch typ { case Int64: builder := array.NewInt64Builder(mem) defer builder.Release() - builder.Resize(len(data)) - for i := 0; i < len(data); i++ { - v, ok := ToInt64(data[i]) + builder.Resize(len(cells)) + for i := 0; i < len(cells); i++ { + v, ok := ToInt64(cells[i]) if !ok { builder.AppendNull() continue @@ -127,9 +77,9 @@ func NewSeriesFromInterfaces(name string, typ Type, data []interface{}) Series { case Float64: builder := array.NewFloat64Builder(mem) defer builder.Release() - builder.Resize(len(data)) - for i := 0; i < len(data); i++ { - v, ok := ToFloat64(data[i]) + builder.Resize(len(cells)) + for i := 0; i < len(cells); i++ { + v, ok := ToFloat64(cells[i]) if !ok { builder.AppendNull() continue @@ -140,9 +90,9 @@ func NewSeriesFromInterfaces(name string, typ Type, data []interface{}) Series { case Boolean: builder := array.NewBooleanBuilder(mem) defer builder.Release() - builder.Resize(len(data)) - for i := 0; i < len(data); i++ { - v, ok := ToBoolean(data[i]) + builder.Resize(len(cells)) + for i := 0; i < len(cells); i++ { + v, ok := ToBoolean(cells[i]) if !ok { builder.AppendNull() continue @@ -153,9 +103,9 @@ func NewSeriesFromInterfaces(name string, typ Type, data []interface{}) Series { case String: builder := array.NewStringBuilder(mem) defer builder.Release() - builder.Resize(len(data)) - for i := 0; i < len(data); i++ { - v, ok := ToString(data[i]) + builder.Resize(len(cells)) + for i := 0; i < len(cells); i++ { + v, ok := ToString(cells[i]) if !ok { builder.AppendNull() continue @@ -164,10 +114,54 @@ func NewSeriesFromInterfaces(name string, typ Type, data []interface{}) Series { } return Series{Name: name, Array: builder.NewArray()} default: - panic(fmt.Errorf("unhandled type %s", typ)) + panic(fmt.Errorf("unsupported type '%s'", typ)) + } +} + +func newInt64Series(name string, data []int64, valid []byte) Series { + length := len(data) + return Series{ + Name: name, + Array: array.NewInt64Data( + array.NewData(mapBowToArrowDataTypes[Int64], length, + []*memory.Buffer{ + memory.NewBufferBytes(valid), + memory.NewBufferBytes(arrow.Int64Traits.CastToBytes(data)), + }, nil, length-bitutil.CountSetBits(valid, 0, length), 0), + ), + } +} + +func newFloat64Series(name string, data []float64, valid []byte) Series { + length := len(data) + return Series{ + Name: name, + Array: array.NewFloat64Data( + array.NewData(mapBowToArrowDataTypes[Float64], length, + []*memory.Buffer{ + memory.NewBufferBytes(valid), + memory.NewBufferBytes(arrow.Float64Traits.CastToBytes(data)), + }, nil, length-bitutil.CountSetBits(valid, 0, length), 0), + ), } } +func newBooleanSeries(name string, data []bool, valid []byte) Series { + mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) + builder := array.NewBooleanBuilder(mem) + defer builder.Release() + builder.AppendValues(data, buildNullBitmapBool(len(data), valid)) + return Series{Name: name, Array: builder.NewArray()} +} + +func newStringSeries(name string, data []string, valid []byte) Series { + mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) + builder := array.NewStringBuilder(mem) + defer builder.Release() + builder.AppendValues(data, buildNullBitmapBool(len(data), valid)) + return Series{Name: name, Array: builder.NewArray()} +} + func buildNullBitmapBool(dataLength int, validityArray interface{}) []bool { switch valid := validityArray.(type) { case nil: diff --git a/bowseries_test.go b/bowseries_test.go index 990b119..b569523 100644 --- a/bowseries_test.go +++ b/bowseries_test.go @@ -31,7 +31,7 @@ func BenchmarkNewSeries(b *testing.B) { b.Run(fmt.Sprintf("%d_rows", rows), func(b *testing.B) { for n := 0; n < b.N; n++ { - NewSeries("test", dataArray, validArray) + NewSeries("test", Int64, dataArray, validArray) } }) } diff --git a/bowsetters_test.go b/bowsetters_test.go index 17ea62c..16c78a9 100644 --- a/bowsetters_test.go +++ b/bowsetters_test.go @@ -9,12 +9,12 @@ import ( func TestBow_SetColName(t *testing.T) { b, err := NewBowWithMetadata(NewMetadata([]string{"k"}, []string{"v"}), - NewSeries("oldName", []float64{0.1, 0.2}, nil), + NewSeries("oldName", Float64, []float64{0.1, 0.2}, nil), ) require.NoError(t, err) expected, err := NewBowWithMetadata(NewMetadata([]string{"k"}, []string{"v"}), - NewSeries("newName", []float64{0.1, 0.2}, nil), + NewSeries("newName", Float64, []float64{0.1, 0.2}, nil), ) require.NoError(t, err) @@ -37,14 +37,14 @@ func TestBow_SetColName(t *testing.T) { func TestBow_Apply(t *testing.T) { b, err := NewBowWithMetadata(NewMetadata([]string{"k"}, []string{"v"}), - NewSeries("unchanged", []float64{0.1, 0.2}, nil), - NewSeries("apply", []float64{0.1, 0.2}, nil), + NewSeries("unchanged", Float64, []float64{0.1, 0.2}, nil), + NewSeries("apply", Float64, []float64{0.1, 0.2}, nil), ) require.NoError(t, err) expect, err := NewBowWithMetadata(NewMetadata([]string{"k"}, []string{"v"}), - NewSeries("unchanged", []float64{0.1, 0.2}, nil), - NewSeries("apply", []string{"0.100000", "0.200000"}, nil), + NewSeries("unchanged", Float64, []float64{0.1, 0.2}, nil), + NewSeries("apply", String, []string{"0.100000", "0.200000"}, nil), ) require.NoError(t, err) @@ -55,8 +55,8 @@ func TestBow_Apply(t *testing.T) { func TestBow_Filter(t *testing.T) { b, err := NewBowWithMetadata(NewMetadata([]string{"k"}, []string{"v"}), - NewSeries("string", []string{"0.1", "0.2"}, nil), - NewSeries("float", []float64{0.1, 0.2}, nil), + NewSeries("string", String, []string{"0.1", "0.2"}, nil), + NewSeries("float", Float64, []float64{0.1, 0.2}, nil), ) require.NoError(t, err) @@ -116,13 +116,13 @@ func TestBow_Filter(t *testing.T) { t.Run("match non concomitant", func(t *testing.T) { b, err := NewBowWithMetadata(NewMetadata([]string{"k"}, []string{"v"}), - NewSeries("string", []string{"0.1", "0.2", "0.3"}, nil), - NewSeries("float", []float64{0.1, 0.2, 0.3}, nil), + NewSeries("string", String, []string{"0.1", "0.2", "0.3"}, nil), + NewSeries("float", Float64, []float64{0.1, 0.2, 0.3}, nil), ) require.NoError(t, err) expect, err := NewBowWithMetadata(NewMetadata([]string{"k"}, []string{"v"}), - NewSeries("string", []string{"0.1", "0.3"}, nil), - NewSeries("float", []float64{0.1, 0.3}, nil), + NewSeries("string", String, []string{"0.1", "0.3"}, nil), + NewSeries("float", Float64, []float64{0.1, 0.3}, nil), ) require.NoError(t, err) diff --git a/bowsort_test.go b/bowsort_test.go index ac4db4b..2294499 100644 --- a/bowsort_test.go +++ b/bowsort_test.go @@ -170,14 +170,14 @@ func TestBow_SortByCol(t *testing.T) { t.Run("with metadata", func(t *testing.T) { b, err := NewBowWithMetadata(NewMetadata([]string{"k"}, []string{"v"}), - NewSeries("time", []int64{1, 3, 2}, nil), - NewSeries("value", []float64{.1, .3, .2}, nil), + NewSeries("time", Int64, []int64{1, 3, 2}, nil), + NewSeries("value", Float64, []float64{.1, .3, .2}, nil), ) require.NoError(t, err) expected, err := NewBowWithMetadata(NewMetadata([]string{"k"}, []string{"v"}), - NewSeries("time", []int64{1, 2, 3}, nil), - NewSeries("value", []float64{.1, .2, .3}, nil), + NewSeries("time", Int64, []int64{1, 2, 3}, nil), + NewSeries("value", Float64, []float64{.1, .2, .3}, nil), ) require.NoError(t, err) diff --git a/bowtypes.go b/bowtypes.go index 60681fd..b9cda02 100644 --- a/bowtypes.go +++ b/bowtypes.go @@ -1,13 +1,15 @@ package bow import ( + "fmt" + "github.com/apache/arrow/go/v8/arrow" ) type Type int // How to add a Type: -// - Seek corresponding arrow.DataType and add it in `mapArrowToBowTypes` +// - Seek corresponding arrow.Type and add it in `mapArrowFingerprintToBowTypes` // - add a convert function with desired logic and add case in other conversion func // - add necessary case in buffer file // - complete GetValue bow method @@ -30,26 +32,12 @@ const ( ) var ( - mapArrowToBowTypes = map[arrow.DataType]Type{ - arrow.PrimitiveTypes.Float64: Float64, - arrow.PrimitiveTypes.Int64: Int64, - arrow.FixedWidthTypes.Boolean: Boolean, - arrow.BinaryTypes.String: String, + mapBowToArrowDataTypes = map[Type]arrow.DataType{ + Float64: arrow.PrimitiveTypes.Float64, + Int64: arrow.PrimitiveTypes.Int64, + Boolean: arrow.FixedWidthTypes.Boolean, + String: arrow.BinaryTypes.String, } - mapBowToArrowTypes = func() map[Type]arrow.DataType { - res := make(map[Type]arrow.DataType) - for arrowDataType, bowType := range mapArrowToBowTypes { - res[bowType] = arrowDataType - } - return res - }() - mapArrowNameToBowTypes = func() map[string]Type { - res := make(map[string]Type) - for arrowDataType, bowType := range mapArrowToBowTypes { - res[arrowDataType.Name()] = bowType - } - return res - }() allType = func() []Type { res := make([]Type, InputDependent-1) for typ := Type(1); typ < InputDependent; typ++ { @@ -59,13 +47,6 @@ var ( }() ) -// ArrowType returns the arrow.DataType from the Bow Type. -func (t Type) ArrowType() arrow.DataType { - return mapBowToArrowTypes[t] -} - -// Convert attempts to convert the `input` value to the Type t. -// Returns nil if it fails. func (t Type) Convert(input interface{}) interface{} { var output interface{} var ok bool @@ -87,33 +68,35 @@ func (t Type) Convert(input interface{}) interface{} { // IsSupported ensures that the Type t is currently supported by Bow and matches a convertible concrete type. func (t Type) IsSupported() bool { - _, ok := mapBowToArrowTypes[t] + _, ok := mapBowToArrowDataTypes[t] return ok } // String returns the string representation of the Type t. func (t Type) String() string { - at, ok := mapBowToArrowTypes[t] + at, ok := mapBowToArrowDataTypes[t] if !ok { return "undefined" } - return at.Name() + return fmt.Sprintf("%s", at) } -func getBowTypeFromArrowName(arrowName string) Type { - typ, ok := mapArrowNameToBowTypes[arrowName] - if !ok { - return Unknown +func getBowTypeFromArrowFingerprint(fingerprint string) Type { + for bowType, arrowType := range mapBowToArrowDataTypes { + if arrowType.Fingerprint() == fingerprint { + return bowType + } } - return typ + return Unknown } -func getBowTypeFromArrowType(arrowType arrow.DataType) Type { - typ, ok := mapArrowToBowTypes[arrowType] - if !ok { - return Unknown +func getBowTypeFromArrowName(name string) Type { + for bowType, arrowType := range mapBowToArrowDataTypes { + if arrowType.Name() == name { + return bowType + } } - return typ + return Unknown } // GetAllTypes returns all Bow types. diff --git a/go.mod b/go.mod index d113fcf..a8e8b23 100644 --- a/go.mod +++ b/go.mod @@ -3,7 +3,7 @@ module github.com/metronlab/bow go 1.18 require ( - github.com/apache/arrow/go/arrow v0.0.0-20211112161151-bc219186db40 // indirect + github.com/apache/arrow/go/arrow v0.0.0-20211112161151-bc219186db40 github.com/apache/arrow/go/v8 v8.0.0-20220425143814-555b4d27192e github.com/apache/thrift v0.16.0 // indirect github.com/davecgh/go-spew v1.1.1 // indirect diff --git a/rolling/aggregation/XXXbenchmarks_test.go b/rolling/aggregation/XXXbenchmarks_test.go index c42c8e5..f31b4d0 100644 --- a/rolling/aggregation/XXXbenchmarks_test.go +++ b/rolling/aggregation/XXXbenchmarks_test.go @@ -99,14 +99,14 @@ func benchmarkBow(b *testing.B) { for i := int64(0); i < size; i++ { buf.Data.([]int64)[i] = i } - return bow.NewSeries("time", buf.Data, nil) + return bow.NewSeries("time", bow.Int64, buf.Data, nil) }(BenchSize) series[1] = func(size int64) bow.Series { buf := bow.NewBuffer(int(size), bow.Float64) for i := int64(0); i < size; i++ { buf.Data.([]float64)[i] = rand.Float64() } - return bow.NewSeries("value", buf.Data, nil) + return bow.NewSeries("value", bow.Float64, buf.Data, nil) }(BenchSize) b.Run("NewBow without validity bitmap", func(b *testing.B) { diff --git a/rolling/aggregation/arithmeticmean_test.go b/rolling/aggregation/arithmeticmean_test.go index 1ea10ef..0424118 100644 --- a/rolling/aggregation/arithmeticmean_test.go +++ b/rolling/aggregation/arithmeticmean_test.go @@ -14,8 +14,8 @@ func TestArithmeticMean(t *testing.T) { testedBow: emptyBow, expectedBow: func() bow.Bow { b, err := bow.NewBow( - bow.NewSeries("time", []int64{}, nil), - bow.NewSeries("value", []float64{}, nil), + bow.NewSeries("time", bow.Int64, []int64{}, nil), + bow.NewSeries("value", bow.Float64, []float64{}, nil), ) assert.NoError(t, err) return b diff --git a/rolling/aggregation/core_test.go b/rolling/aggregation/core_test.go index 804806e..4ed684a 100644 --- a/rolling/aggregation/core_test.go +++ b/rolling/aggregation/core_test.go @@ -23,8 +23,8 @@ type testCase struct { var ( emptyBow, _ = bow.NewBow( - bow.NewSeries(timeCol, []int64{}, nil), - bow.NewSeries(valueCol, []float64{}, nil), + bow.NewSeries(timeCol, bow.Int64, []int64{}, nil), + bow.NewSeries(valueCol, bow.Float64, []float64{}, nil), ) nilBow, _ = bow.NewBowFromRowBasedInterfaces( []string{timeCol, valueCol}, diff --git a/rolling/aggregation/count_test.go b/rolling/aggregation/count_test.go index 049527c..975c6c1 100644 --- a/rolling/aggregation/count_test.go +++ b/rolling/aggregation/count_test.go @@ -14,8 +14,8 @@ func TestCount(t *testing.T) { testedBow: emptyBow, expectedBow: func() bow.Bow { b, err := bow.NewBow( - bow.NewSeries("time", []int64{}, nil), - bow.NewSeries("value", []int64{}, nil), + bow.NewSeries("time", bow.Int64, []int64{}, nil), + bow.NewSeries("value", bow.Int64, []int64{}, nil), ) assert.NoError(t, err) return b diff --git a/rolling/aggregation/firstlast_test.go b/rolling/aggregation/firstlast_test.go index 26b06f4..ea7fb09 100644 --- a/rolling/aggregation/firstlast_test.go +++ b/rolling/aggregation/firstlast_test.go @@ -14,8 +14,8 @@ func TestFirst(t *testing.T) { testedBow: emptyBow, expectedBow: func() bow.Bow { b, err := bow.NewBow( - bow.NewSeries("time", []int64{}, nil), - bow.NewSeries("value", []float64{}, nil), + bow.NewSeries("time", bow.Int64, []int64{}, nil), + bow.NewSeries("value", bow.Float64, []float64{}, nil), ) assert.NoError(t, err) return b @@ -88,8 +88,8 @@ func TestLast(t *testing.T) { testedBow: emptyBow, expectedBow: func() bow.Bow { b, err := bow.NewBow( - bow.NewSeries("time", []int64{}, nil), - bow.NewSeries("value", []float64{}, nil), + bow.NewSeries("time", bow.Int64, []int64{}, nil), + bow.NewSeries("value", bow.Float64, []float64{}, nil), ) assert.NoError(t, err) return b diff --git a/rolling/aggregation/integral_test.go b/rolling/aggregation/integral_test.go index 4a04641..1dd522c 100644 --- a/rolling/aggregation/integral_test.go +++ b/rolling/aggregation/integral_test.go @@ -15,8 +15,8 @@ func TestIntegralStep(t *testing.T) { testedBow: emptyBow, expectedBow: func() bow.Bow { b, err := bow.NewBow( - bow.NewSeries("time", []int64{}, nil), - bow.NewSeries("value", []float64{}, nil), + bow.NewSeries("time", bow.Int64, []int64{}, nil), + bow.NewSeries("value", bow.Float64, []float64{}, nil), ) assert.NoError(t, err) return b @@ -98,8 +98,8 @@ func TestIntegralStep_scaled(t *testing.T) { testedBow: emptyBow, expectedBow: func() bow.Bow { b, err := bow.NewBow( - bow.NewSeries("time", []int64{}, nil), - bow.NewSeries("value", []float64{}, nil), + bow.NewSeries("time", bow.Int64, []int64{}, nil), + bow.NewSeries("value", bow.Float64, []float64{}, nil), ) assert.NoError(t, err) return b @@ -134,8 +134,8 @@ func TestIntegralTrapezoid(t *testing.T) { testedBow: emptyBow, expectedBow: func() bow.Bow { b, err := bow.NewBow( - bow.NewSeries("time", []int64{}, nil), - bow.NewSeries("value", []float64{}, nil), + bow.NewSeries("time", bow.Int64, []int64{}, nil), + bow.NewSeries("value", bow.Float64, []float64{}, nil), ) assert.NoError(t, err) return b diff --git a/rolling/aggregation/minmax_test.go b/rolling/aggregation/minmax_test.go index 58f4105..2106852 100644 --- a/rolling/aggregation/minmax_test.go +++ b/rolling/aggregation/minmax_test.go @@ -14,8 +14,8 @@ func TestMin(t *testing.T) { testedBow: emptyBow, expectedBow: func() bow.Bow { b, err := bow.NewBow( - bow.NewSeries("time", []int64{}, nil), - bow.NewSeries("value", []float64{}, nil), + bow.NewSeries("time", bow.Int64, []int64{}, nil), + bow.NewSeries("value", bow.Float64, []float64{}, nil), ) assert.NoError(t, err) return b @@ -88,8 +88,8 @@ func TestMax(t *testing.T) { testedBow: emptyBow, expectedBow: func() bow.Bow { b, err := bow.NewBow( - bow.NewSeries("time", []int64{}, nil), - bow.NewSeries("value", []float64{}, nil), + bow.NewSeries("time", bow.Int64, []int64{}, nil), + bow.NewSeries("value", bow.Float64, []float64{}, nil), ) assert.NoError(t, err) return b diff --git a/rolling/aggregation/mode_test.go b/rolling/aggregation/mode_test.go index a96c2fb..ef762a0 100644 --- a/rolling/aggregation/mode_test.go +++ b/rolling/aggregation/mode_test.go @@ -35,8 +35,8 @@ func TestMode(t *testing.T) { testedBow: emptyBow, expectedBow: func() bow.Bow { b, err := bow.NewBow( - bow.NewSeries(timeCol, []int64{}, nil), - bow.NewSeries(valueCol, []float64{}, nil), + bow.NewSeries(timeCol, bow.Int64, []int64{}, nil), + bow.NewSeries(valueCol, bow.Float64, []float64{}, nil), ) assert.NoError(t, err) return b diff --git a/rolling/aggregation/sum_test.go b/rolling/aggregation/sum_test.go index cf9187d..da397dc 100644 --- a/rolling/aggregation/sum_test.go +++ b/rolling/aggregation/sum_test.go @@ -14,8 +14,8 @@ func TestSum(t *testing.T) { testedBow: emptyBow, expectedBow: func() bow.Bow { b, err := bow.NewBow( - bow.NewSeries("time", []int64{}, nil), - bow.NewSeries("value", []float64{}, nil), + bow.NewSeries("time", bow.Int64, []int64{}, nil), + bow.NewSeries("value", bow.Float64, []float64{}, nil), ) assert.NoError(t, err) return b diff --git a/rolling/aggregation/weightedmean_test.go b/rolling/aggregation/weightedmean_test.go index e82a939..c9fe42f 100644 --- a/rolling/aggregation/weightedmean_test.go +++ b/rolling/aggregation/weightedmean_test.go @@ -14,8 +14,8 @@ func TestWeightedAverageStep(t *testing.T) { testedBow: emptyBow, expectedBow: func() bow.Bow { b, err := bow.NewBow( - bow.NewSeries("time", []int64{}, nil), - bow.NewSeries("value", []float64{}, nil), + bow.NewSeries("time", bow.Int64, []int64{}, nil), + bow.NewSeries("value", bow.Float64, []float64{}, nil), ) assert.NoError(t, err) return b @@ -103,8 +103,8 @@ func TestWeightedAverageLinear(t *testing.T) { testedBow: emptyBow, expectedBow: func() bow.Bow { b, err := bow.NewBow( - bow.NewSeries("time", []int64{}, nil), - bow.NewSeries("value", []float64{}, nil), + bow.NewSeries("time", bow.Int64, []int64{}, nil), + bow.NewSeries("value", bow.Float64, []float64{}, nil), ) assert.NoError(t, err) return b From e0986018c3d740974a64e802ef8f855262b40afd Mon Sep 17 00:00:00 2001 From: agelloz Date: Thu, 28 Apr 2022 15:00:08 +0200 Subject: [PATCH 13/29] corrections --- bowseries.go | 40 ++++++++++++++++++++++++++-------------- 1 file changed, 26 insertions(+), 14 deletions(-) diff --git a/bowseries.go b/bowseries.go index 45b43ed..cf9e8ba 100644 --- a/bowseries.go +++ b/bowseries.go @@ -17,6 +17,13 @@ type Series struct { Array arrow.Array } +// NewSeries returns a new Series from: +// - name: string +// - typ: data type +// - dataArray: slice of the data in any of the Bow supported types +// - validityArray: +// - If nil, the data will be non-nil +// - Can be of type []bool or []byte to represent nil values func NewSeries(name string, typ Type, dataArray interface{}, validityArray interface{}) Series { switch typ { case Int64: @@ -36,6 +43,7 @@ func NewSeries(name string, typ Type, dataArray interface{}, validityArray inter } } +// NewSeriesFromBuffer returns a new Series from a name and a Buffer. func NewSeriesFromBuffer(name string, buf Buffer) Series { switch buf.DataType { case Int64: @@ -51,10 +59,14 @@ func NewSeriesFromBuffer(name string, buf Buffer) Series { } } -func NewSeriesFromInterfaces(name string, typ Type, cells []interface{}) Series { +// NewSeriesFromInterfaces returns a new Series from: +// - name: string +// - typ: Bow Type +// - data: represented by a slice of interface{}, with eventually nil values +func NewSeriesFromInterfaces(name string, typ Type, data []interface{}) Series { if typ == Unknown { var err error - if typ, err = getBowTypeFromInterfaces(cells); err != nil { + if typ, err = getBowTypeFromInterfaces(data); err != nil { panic(err) } } @@ -64,9 +76,9 @@ func NewSeriesFromInterfaces(name string, typ Type, cells []interface{}) Series case Int64: builder := array.NewInt64Builder(mem) defer builder.Release() - builder.Resize(len(cells)) - for i := 0; i < len(cells); i++ { - v, ok := ToInt64(cells[i]) + builder.Resize(len(data)) + for i := 0; i < len(data); i++ { + v, ok := ToInt64(data[i]) if !ok { builder.AppendNull() continue @@ -77,9 +89,9 @@ func NewSeriesFromInterfaces(name string, typ Type, cells []interface{}) Series case Float64: builder := array.NewFloat64Builder(mem) defer builder.Release() - builder.Resize(len(cells)) - for i := 0; i < len(cells); i++ { - v, ok := ToFloat64(cells[i]) + builder.Resize(len(data)) + for i := 0; i < len(data); i++ { + v, ok := ToFloat64(data[i]) if !ok { builder.AppendNull() continue @@ -90,9 +102,9 @@ func NewSeriesFromInterfaces(name string, typ Type, cells []interface{}) Series case Boolean: builder := array.NewBooleanBuilder(mem) defer builder.Release() - builder.Resize(len(cells)) - for i := 0; i < len(cells); i++ { - v, ok := ToBoolean(cells[i]) + builder.Resize(len(data)) + for i := 0; i < len(data); i++ { + v, ok := ToBoolean(data[i]) if !ok { builder.AppendNull() continue @@ -103,9 +115,9 @@ func NewSeriesFromInterfaces(name string, typ Type, cells []interface{}) Series case String: builder := array.NewStringBuilder(mem) defer builder.Release() - builder.Resize(len(cells)) - for i := 0; i < len(cells); i++ { - v, ok := ToString(cells[i]) + builder.Resize(len(data)) + for i := 0; i < len(data); i++ { + v, ok := ToString(data[i]) if !ok { builder.AppendNull() continue From 3b002c15c218573e32151acb1e9717d5fd22a75d Mon Sep 17 00:00:00 2001 From: agelloz Date: Thu, 28 Apr 2022 15:03:48 +0200 Subject: [PATCH 14/29] corrections --- bowseries.go | 2 +- go.mod | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/bowseries.go b/bowseries.go index cf9e8ba..099de85 100644 --- a/bowseries.go +++ b/bowseries.go @@ -4,9 +4,9 @@ import ( "encoding/json" "fmt" - "github.com/apache/arrow/go/arrow/bitutil" "github.com/apache/arrow/go/v8/arrow" "github.com/apache/arrow/go/v8/arrow/array" + "github.com/apache/arrow/go/v8/arrow/bitutil" "github.com/apache/arrow/go/v8/arrow/memory" ) diff --git a/go.mod b/go.mod index a8e8b23..d113fcf 100644 --- a/go.mod +++ b/go.mod @@ -3,7 +3,7 @@ module github.com/metronlab/bow go 1.18 require ( - github.com/apache/arrow/go/arrow v0.0.0-20211112161151-bc219186db40 + github.com/apache/arrow/go/arrow v0.0.0-20211112161151-bc219186db40 // indirect github.com/apache/arrow/go/v8 v8.0.0-20220425143814-555b4d27192e github.com/apache/thrift v0.16.0 // indirect github.com/davecgh/go-spew v1.1.1 // indirect From 3026ef4035290652f1e3d04edc61c69c41668c82 Mon Sep 17 00:00:00 2001 From: agelloz Date: Mon, 2 May 2022 14:46:31 +0200 Subject: [PATCH 15/29] clean up --- bowbuffer.go | 22 +++++++++++----------- bowjson.go | 2 +- bowseries.go | 12 ++++++------ bowtypes.go | 12 ++++++------ 4 files changed, 24 insertions(+), 24 deletions(-) diff --git a/bowbuffer.go b/bowbuffer.go index b94c6d3..1c40ab7 100644 --- a/bowbuffer.go +++ b/bowbuffer.go @@ -133,32 +133,32 @@ func (b Buffer) Less(i, j int) bool { } func (b *bow) NewBufferFromCol(colIndex int) Buffer { + data := b.Column(colIndex).Data() res := Buffer{DataType: b.ColumnType(colIndex)} - arrayData := b.Column(colIndex).Data() switch b.ColumnType(colIndex) { case Int64: - arr := array.NewInt64Data(arrayData) + arr := array.NewInt64Data(data) nullBitmapBytes := arr.NullBitmapBytes()[:bitutil.CeilByte(arr.Data().Len())/8] nullBitmapBytesCopy := make([]byte, len(nullBitmapBytes)) copy(nullBitmapBytesCopy, nullBitmapBytes) res.Data = int64Values(arr) res.nullBitmapBytes = nullBitmapBytesCopy case Float64: - arr := array.NewFloat64Data(arrayData) + arr := array.NewFloat64Data(data) nullBitmapBytes := arr.NullBitmapBytes()[:bitutil.CeilByte(arr.Data().Len())/8] nullBitmapBytesCopy := make([]byte, len(nullBitmapBytes)) copy(nullBitmapBytesCopy, nullBitmapBytes) res.Data = float64Values(arr) res.nullBitmapBytes = nullBitmapBytesCopy case Boolean: - arr := array.NewBooleanData(arrayData) + arr := array.NewBooleanData(data) nullBitmapBytes := arr.NullBitmapBytes()[:bitutil.CeilByte(arr.Data().Len())/8] nullBitmapBytesCopy := make([]byte, len(nullBitmapBytes)) copy(nullBitmapBytesCopy, nullBitmapBytes) res.Data = booleanValues(arr) res.nullBitmapBytes = nullBitmapBytesCopy case String: - arr := array.NewStringData(arrayData) + arr := array.NewStringData(data) nullBitmapBytes := arr.NullBitmapBytes()[:bitutil.CeilByte(arr.Data().Len())/8] nullBitmapBytesCopy := make([]byte, len(nullBitmapBytes)) copy(nullBitmapBytesCopy, nullBitmapBytes) @@ -175,29 +175,29 @@ func buildNullBitmapBytes(dataLength int, validityArray interface{}) []byte { var res []byte nullBitmapLength := bitutil.CeilByte(dataLength) / 8 - switch validityArray := validityArray.(type) { + switch valid := validityArray.(type) { case nil: res = make([]byte, nullBitmapLength) for i := 0; i < dataLength; i++ { bitutil.SetBit(res, i) } case []bool: - if len(validityArray) != dataLength { + if len(valid) != dataLength { panic(fmt.Errorf("dataArray and validityArray have different lengths")) } res = make([]byte, nullBitmapLength) for i := 0; i < dataLength; i++ { - if validityArray[i] { + if valid[i] { bitutil.SetBit(res, i) } } case []byte: - if len(validityArray) != nullBitmapLength { + if len(valid) != nullBitmapLength { panic(fmt.Errorf("dataArray and validityArray have different lengths")) } - return validityArray + return valid default: - panic(fmt.Errorf("unsupported type '%T'", validityArray)) + panic(fmt.Errorf("unsupported type '%T'", valid)) } return res diff --git a/bowjson.go b/bowjson.go index d679793..03c82a8 100644 --- a/bowjson.go +++ b/bowjson.go @@ -96,7 +96,7 @@ func (b *bow) NewValuesFromJSON(jsonB JSONBow) error { for fieldIndex, field := range jsonB.Schema.Fields { ok := false - for _, arrowType := range mapBowToArrowDataTypes { + for _, arrowType := range mapBowToArrowTypes { if arrowType.Name() == field.Type { ok = true } diff --git a/bowseries.go b/bowseries.go index 099de85..f7e532a 100644 --- a/bowseries.go +++ b/bowseries.go @@ -19,11 +19,11 @@ type Series struct { // NewSeries returns a new Series from: // - name: string -// - typ: data type -// - dataArray: slice of the data in any of the Bow supported types +// - typ: Bow data Type +// - dataArray: slice of the data // - validityArray: -// - If nil, the data will be non-nil -// - Can be of type []bool or []byte to represent nil values +// - if nil, the data will be non-nil +// - can be of type []bool or []byte to represent nil values func NewSeries(name string, typ Type, dataArray interface{}, validityArray interface{}) Series { switch typ { case Int64: @@ -135,7 +135,7 @@ func newInt64Series(name string, data []int64, valid []byte) Series { return Series{ Name: name, Array: array.NewInt64Data( - array.NewData(mapBowToArrowDataTypes[Int64], length, + array.NewData(mapBowToArrowTypes[Int64], length, []*memory.Buffer{ memory.NewBufferBytes(valid), memory.NewBufferBytes(arrow.Int64Traits.CastToBytes(data)), @@ -149,7 +149,7 @@ func newFloat64Series(name string, data []float64, valid []byte) Series { return Series{ Name: name, Array: array.NewFloat64Data( - array.NewData(mapBowToArrowDataTypes[Float64], length, + array.NewData(mapBowToArrowTypes[Float64], length, []*memory.Buffer{ memory.NewBufferBytes(valid), memory.NewBufferBytes(arrow.Float64Traits.CastToBytes(data)), diff --git a/bowtypes.go b/bowtypes.go index b9cda02..5275f86 100644 --- a/bowtypes.go +++ b/bowtypes.go @@ -9,7 +9,7 @@ import ( type Type int // How to add a Type: -// - Seek corresponding arrow.Type and add it in `mapArrowFingerprintToBowTypes` +// - Seek corresponding arrow.DataType and add it in `mapBowToArrowTypes` // - add a convert function with desired logic and add case in other conversion func // - add necessary case in buffer file // - complete GetValue bow method @@ -32,7 +32,7 @@ const ( ) var ( - mapBowToArrowDataTypes = map[Type]arrow.DataType{ + mapBowToArrowTypes = map[Type]arrow.DataType{ Float64: arrow.PrimitiveTypes.Float64, Int64: arrow.PrimitiveTypes.Int64, Boolean: arrow.FixedWidthTypes.Boolean, @@ -68,13 +68,13 @@ func (t Type) Convert(input interface{}) interface{} { // IsSupported ensures that the Type t is currently supported by Bow and matches a convertible concrete type. func (t Type) IsSupported() bool { - _, ok := mapBowToArrowDataTypes[t] + _, ok := mapBowToArrowTypes[t] return ok } // String returns the string representation of the Type t. func (t Type) String() string { - at, ok := mapBowToArrowDataTypes[t] + at, ok := mapBowToArrowTypes[t] if !ok { return "undefined" } @@ -82,7 +82,7 @@ func (t Type) String() string { } func getBowTypeFromArrowFingerprint(fingerprint string) Type { - for bowType, arrowType := range mapBowToArrowDataTypes { + for bowType, arrowType := range mapBowToArrowTypes { if arrowType.Fingerprint() == fingerprint { return bowType } @@ -91,7 +91,7 @@ func getBowTypeFromArrowFingerprint(fingerprint string) Type { } func getBowTypeFromArrowName(name string) Type { - for bowType, arrowType := range mapBowToArrowDataTypes { + for bowType, arrowType := range mapBowToArrowTypes { if arrowType.Name() == name { return bowType } From d6515a8354c230eedb5a68afce544d67585a96c0 Mon Sep 17 00:00:00 2001 From: agelloz Date: Mon, 2 May 2022 15:01:06 +0200 Subject: [PATCH 16/29] clean up --- bowconvert.go | 22 +++++++++++----------- bowjoin.gen.go | 2 +- bowjoin.gen.go.tmpl | 2 +- bowjoin.go | 2 +- bowparquet_test.go | 2 +- bowseries.gen_test.go | 2 +- bowstring.go | 39 +++++++++++++++++---------------------- 7 files changed, 33 insertions(+), 38 deletions(-) diff --git a/bowconvert.go b/bowconvert.go index c2797fd..b17c29d 100644 --- a/bowconvert.go +++ b/bowconvert.go @@ -9,31 +9,31 @@ import ( // ToInt64 attempts to convert `input` to int64. // Return also a false boolean if the conversion failed. func ToInt64(input interface{}) (output int64, ok bool) { - switch value := input.(type) { + switch input := input.(type) { case json.Number: - output, err := value.Int64() + output, err := input.Int64() return output, err == nil case int: - return int64(value), true + return int64(input), true case int8: - return int64(value), true + return int64(input), true case int16: - return int64(value), true + return int64(input), true case int32: - return int64(value), true + return int64(input), true case int64: - return value, true + return input, true case float32: - return int64(value), true + return int64(input), true case float64: - return int64(value), true + return int64(input), true case bool: - if value { + if input { return 1, true } return 0, true case string: - output, err := strconv.ParseInt(value, 10, 64) + output, err := strconv.ParseInt(input, 10, 64) return output, err == nil } return diff --git a/bowjoin.gen.go b/bowjoin.gen.go index 80d76ae..23a99e5 100644 --- a/bowjoin.gen.go +++ b/bowjoin.gen.go @@ -8,7 +8,7 @@ import ( "github.com/apache/arrow/go/v8/arrow/array" ) -func innerFillLeftBowCols(newSeries *[]Series, left, right *bow, newNumRows int, +func innerFillLeftBowCols(newSeries *[]Series, left *bow, newNumRows int, commonRows struct{ l, r []int }) { for colIndex := 0; colIndex < left.NumCols(); colIndex++ { diff --git a/bowjoin.gen.go.tmpl b/bowjoin.gen.go.tmpl index 8cefacd..95ddd0f 100644 --- a/bowjoin.gen.go.tmpl +++ b/bowjoin.gen.go.tmpl @@ -6,7 +6,7 @@ import ( "github.com/apache/arrow/go/v8/arrow/array" ) -func innerFillLeftBowCols(newSeries *[]Series, left, right *bow, newNumRows int, +func innerFillLeftBowCols(newSeries *[]Series, left *bow, newNumRows int, commonRows struct{ l, r []int }) { for colIndex := 0; colIndex < left.NumCols(); colIndex++ { diff --git a/bowjoin.go b/bowjoin.go index 0cdd581..af70a79 100644 --- a/bowjoin.go +++ b/bowjoin.go @@ -37,7 +37,7 @@ func (b *bow) InnerJoin(other Bow) Bow { newSeries := make([]Series, newNumCols) newNumRows := len(commonRows.l) - innerFillLeftBowCols(&newSeries, left, right, + innerFillLeftBowCols(&newSeries, left, newNumRows, commonRows) innerFillRightBowCols(&newSeries, left, right, newNumRows, newNumCols, commonCols, commonRows) diff --git a/bowparquet_test.go b/bowparquet_test.go index d8bc1c2..c75e681 100644 --- a/bowparquet_test.go +++ b/bowparquet_test.go @@ -33,7 +33,7 @@ func TestParquet(t *testing.T) { require.NoError(t, os.Remove(testOutputFileName+".parquet")) }) - t.Run("bow supported types with rows and nils", func(t *testing.T) { + t.Run("all supported types with rows and nil values", func(t *testing.T) { bBefore, err := NewBowFromRowBasedInterfaces( []string{"int", "float", "bool", "string"}, []Type{Int64, Float64, Boolean, String}, diff --git a/bowseries.gen_test.go b/bowseries.gen_test.go index b7af97b..f130914 100644 --- a/bowseries.gen_test.go +++ b/bowseries.gen_test.go @@ -7,7 +7,7 @@ import ( "github.com/stretchr/testify/require" ) -func TestNewSeriesFromColBasedInterfaces(t *testing.T) { +func TestNewSeriesFromInterfaces(t *testing.T) { for _, typ := range allType { t.Run(typ.String(), func(t *testing.T) { testcase := []interface{}{typ.Convert(0), nil} diff --git a/bowstring.go b/bowstring.go index 70ad336..ccafb3c 100644 --- a/bowstring.go +++ b/bowstring.go @@ -17,39 +17,34 @@ func (b *bow) String() string { // tabs will be replaced by two spaces by formatter w.Init(writer, 0, 4, 2, ' ', 0) - // format any line (header or row) - formatRow := func(getCellStr func(colIndex int) string) { - var cells []string + var cells []string + for colIndex := 0; colIndex < b.NumCols(); colIndex++ { + cells = append(cells, fmt.Sprintf( + "%v", fmt.Sprintf( + "%s:%v", b.Schema().Field(colIndex).Name, b.ColumnType(colIndex)))) + } + _, err := fmt.Fprintln(w, strings.Join(cells, "\t")) + if err != nil { + panic(err) + } + + for row := range b.GetRowsChan() { + cells = []string{} for colIndex := 0; colIndex < b.NumCols(); colIndex++ { - cells = append(cells, fmt.Sprintf("%v", getCellStr(colIndex))) + cells = append(cells, fmt.Sprintf("%v", row[b.Schema().Field(colIndex).Name])) } - _, err := fmt.Fprintln(w, strings.Join(cells, "\t")) - if err != nil { + if _, err = fmt.Fprintln(w, strings.Join(cells, "\t")); err != nil { panic(err) } } - // Print col names on buffer - formatRow(func(colIndex int) string { - return fmt.Sprintf("%s:%v", b.Schema().Field(colIndex).Name, b.ColumnType(colIndex)) - }) - - // Print each row on buffer - rowChan := b.GetRowsChan() - for row := range rowChan { - formatRow(func(colIndex int) string { - return fmt.Sprintf("%v", row[b.Schema().Field(colIndex).Name]) - }) - } - if b.Metadata().Len() > 0 { - _, err := fmt.Fprintf(w, "metadata: %+v\n", b.Metadata()) - if err != nil { + if _, err = fmt.Fprintf(w, "metadata: %+v\n", b.Metadata()); err != nil { panic(err) } } - if err := w.Flush(); err != nil { + if err = w.Flush(); err != nil { panic(err) } From 379c90cba0ab003797a0fbbbb31cd57a07cf8f77 Mon Sep 17 00:00:00 2001 From: agelloz Date: Mon, 2 May 2022 17:17:42 +0200 Subject: [PATCH 17/29] changelog --- CHANGELOG.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index a5df524..d60f227 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,9 @@ UNRELEASED [XXXX-XX-XX] - bump to arrow/go/v8 - remove useless count script - add gci linter + - improve documentation + - improve error handling + - improve code readability v0.18.0 [2022-02-16] ------------------- From d61360e0a7019a108a0782399b02106f2a6bb139 Mon Sep 17 00:00:00 2001 From: agelloz Date: Mon, 2 May 2022 17:23:32 +0200 Subject: [PATCH 18/29] CHANGELOG --- CHANGELOG.md | 1 + bowseries_test.go | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d60f227..1da3107 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,7 @@ UNRELEASED [XXXX-XX-XX] - improve documentation - improve error handling - improve code readability + - remove code gen to prepare for Timestamp support v0.18.0 [2022-02-16] ------------------- diff --git a/bowseries_test.go b/bowseries_test.go index 990b119..801a856 100644 --- a/bowseries_test.go +++ b/bowseries_test.go @@ -8,7 +8,7 @@ import ( "github.com/stretchr/testify/require" ) -func TestNewSeriesFromColBasedInterfaces(t *testing.T) { +func TestNewSeriesFromInterfaces(t *testing.T) { for _, typ := range allType { t.Run(typ.String(), func(t *testing.T) { testcase := []interface{}{typ.Convert(0), nil} From 107c0673dd3e15421486174253a0445f917ee568 Mon Sep 17 00:00:00 2001 From: agelloz Date: Mon, 2 May 2022 17:30:00 +0200 Subject: [PATCH 19/29] CHANGELOG --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1da3107..6c8a221 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ UNRELEASED [XXXX-XX-XX] - improve error handling - improve code readability - remove code gen to prepare for Timestamp support + - add Bow data type in Buffer to decouple Go native types from Arrow types v0.18.0 [2022-02-16] ------------------- From 2b956c6153ed6bfb3e85d7cacdfd79044fad5dc3 Mon Sep 17 00:00:00 2001 From: agelloz Date: Mon, 2 May 2022 18:43:27 +0200 Subject: [PATCH 20/29] changelog --- bowfill.go | 23 +++++++++++++++++++++-- bowgetters_test.go | 14 +++++++++++--- bowjoin_test.go | 35 +++++++++++++++++++++++++++++++++-- bowparquet_test.go | 9 ++------- bowstring.go | 3 +-- 5 files changed, 68 insertions(+), 16 deletions(-) diff --git a/bowfill.go b/bowfill.go index a96652a..9f19128 100644 --- a/bowfill.go +++ b/bowfill.go @@ -5,11 +5,10 @@ import ( "math" "sync" + "github.com/apache/arrow/go/v8/arrow" "github.com/apache/arrow/go/v8/arrow/array" ) -// TODO: add support for timestamp types - // FillLinear fills the column toFillColIndex using the Linear interpolation method according // to the reference column refColIndex, which has to be sorted. // Fills only Int64 and Float64 types. @@ -29,6 +28,7 @@ func (b *bow) FillLinear(refColIndex, toFillColIndex int) (Bow, error) { switch b.ColumnType(refColIndex) { case Int64: case Float64: + case TimestampSec, TimestampMilli, TimestampMicro, TimestampNano: default: return nil, fmt.Errorf("refColIndex '%d' is of type '%s'", refColIndex, b.ColumnType(refColIndex)) @@ -46,6 +46,7 @@ func (b *bow) FillLinear(refColIndex, toFillColIndex int) (Bow, error) { switch b.ColumnType(toFillColIndex) { case Int64: case Float64: + case TimestampSec, TimestampMilli, TimestampMicro, TimestampNano: default: return nil, fmt.Errorf( "toFillColIndex '%d' is of unsupported type '%s'", @@ -83,6 +84,8 @@ func (b *bow) FillLinear(refColIndex, toFillColIndex int) (Bow, error) { buf.SetOrDropStrict(rowIndex, int64(prevToFill)) case Float64: buf.SetOrDropStrict(rowIndex, prevToFill) + case TimestampSec, TimestampMilli, TimestampMicro, TimestampNano: + buf.SetOrDropStrict(rowIndex, arrow.Timestamp(prevToFill)) } } @@ -95,6 +98,8 @@ func (b *bow) FillLinear(refColIndex, toFillColIndex int) (Bow, error) { buf.SetOrDropStrict(rowIndex, int64(math.Round(tmp))) case Float64: buf.SetOrDropStrict(rowIndex, tmp) + case TimestampSec, TimestampMilli, TimestampMicro, TimestampNano: + buf.SetOrDropStrict(rowIndex, arrow.Timestamp(math.Round(tmp))) } } @@ -118,6 +123,7 @@ func (b *bow) FillMean(colIndices ...int) (Bow, error) { switch b.ColumnType(colIndex) { case Int64: case Float64: + case TimestampSec, TimestampMilli, TimestampMicro, TimestampNano: default: return nil, fmt.Errorf( "column '%s' is of unsupported type '%s'", @@ -151,6 +157,8 @@ func (b *bow) FillMean(colIndices ...int) (Bow, error) { buf.SetOrDropStrict(rowIndex, int64(math.Round((prevVal+nextVal)/2))) case Float64: buf.SetOrDropStrict(rowIndex, (prevVal+nextVal)/2) + case TimestampSec, TimestampMilli, TimestampMicro, TimestampNano: + buf.SetOrDropStrict(rowIndex, arrow.Timestamp(math.Round((prevVal+nextVal)/2))) } } } @@ -241,6 +249,17 @@ func fill(method string, b *bow, colIndices ...int) (Bow, error) { buf.SetOrDropStrict(rowIndex, arr.Value(fillRowIndex)) } } + case TimestampSec, TimestampMilli, TimestampMicro, TimestampNano: + arr := array.NewTimestampData(data) + for rowIndex := 0; rowIndex < b.NumRows(); rowIndex++ { + if buf.IsValid(rowIndex) { + continue + } + fillRowIndex := getFillRowIndex(b, method, colIndex, rowIndex) + if fillRowIndex > -1 { + buf.SetOrDropStrict(rowIndex, arr.Value(fillRowIndex)) + } + } default: filledSeries[colIndex] = b.NewSeriesFromCol(colIndex) } diff --git a/bowgetters_test.go b/bowgetters_test.go index 156cc50..0f3d85c 100644 --- a/bowgetters_test.go +++ b/bowgetters_test.go @@ -3,6 +3,7 @@ package bow import ( "testing" + "github.com/apache/arrow/go/v8/arrow" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) @@ -32,12 +33,13 @@ func TestBow_GetValue(t *testing.T) { } func TestBow_Distinct(t *testing.T) { - colNames := []string{"time", "value", "meta"} - colTypes := []Type{Int64, Float64, String} + colNames := []string{"time", "value", "meta", "timestamp"} + colTypes := []Type{Int64, Float64, String, TimestampMilli} colData := [][]interface{}{ {1, 1, 2, nil, 3}, {1.1, 1.1, 2.2, nil, 3.3}, {"", "test", "test", nil, "3.3"}, + {1, 1, 2, nil, 3}, } b, err := NewBowFromColBasedInterfaces(colNames, colTypes, colData) @@ -67,5 +69,11 @@ func TestBow_Distinct(t *testing.T) { ExpectEqual(t, expect, res) }) - // TODO: add tests for timestamp types + t.Run(TimestampMilli.String(), func(t *testing.T) { + res := b.Distinct(3) + expect, err := NewBow(NewSeries("timestamp", TimestampMilli, []arrow.Timestamp{1, 2, 3}, nil)) + require.NoError(t, err) + + ExpectEqual(t, expect, res) + }) } diff --git a/bowjoin_test.go b/bowjoin_test.go index 7277c30..f121177 100644 --- a/bowjoin_test.go +++ b/bowjoin_test.go @@ -8,8 +8,6 @@ import ( "github.com/stretchr/testify/require" ) -// TODO: add test cases for timestamp types - func TestBow_OuterJoin(t *testing.T) { t.Run("two empty bows", func(t *testing.T) { b1 := NewBowEmpty() @@ -168,6 +166,39 @@ func TestBow_OuterJoin(t *testing.T) { assert.EqualValues(t, expected.String(), result.String()) }) + t.Run("timestamps", func(t *testing.T) { + b1, err := NewBowFromRowBasedInterfaces([]string{"milli", "micro1"}, + []Type{TimestampMilli, TimestampMicro}, [][]interface{}{ + {1000, 0}, + {1100, 1}, + {1200, 2}, + {1300, 3}, + }) + require.NoError(t, err) + + b2, err := NewBowFromRowBasedInterfaces([]string{"milli", "micro2"}, + []Type{TimestampMilli, TimestampMicro}, [][]interface{}{ + {1100, 0}, + {1200, 1}, + {1300, 2}, + {1400, 3}, + }) + require.NoError(t, err) + + expected, err := NewBowFromRowBasedInterfaces([]string{"milli", "micro1", "micro2"}, + []Type{TimestampMilli, TimestampMicro, TimestampMicro}, [][]interface{}{ + {1000, 0, nil}, + {1100, 1, 0}, + {1200, 2, 1}, + {1300, 3, 2}, + {1400, nil, 3}, + }) + require.NoError(t, err) + + result := b1.OuterJoin(b2) + assert.EqualValues(t, expected.String(), result.String()) + }) + t.Run("with one common column", func(t *testing.T) { b1, err := NewBowFromRowBasedInterfaces([]string{"a", "b", "c"}, []Type{Int64, Int64, Int64}, [][]interface{}{ diff --git a/bowparquet_test.go b/bowparquet_test.go index d732287..6f4a345 100644 --- a/bowparquet_test.go +++ b/bowparquet_test.go @@ -1,7 +1,6 @@ package bow import ( - "fmt" "os" "testing" @@ -56,15 +55,11 @@ func TestParquet(t *testing.T) { }) require.NoError(t, err) - fmt.Printf("bBefore\n%s\n", bBefore) + assert.NoError(t, bBefore.WriteParquet(testOutputFileName+"_withrows", false)) - assert.NoError(t, bBefore.WriteParquet(testOutputFileName+"_withrows", true)) - - bAfter, err := NewBowFromParquet(testOutputFileName+"_withrows.parquet", true) + bAfter, err := NewBowFromParquet(testOutputFileName+"_withrows.parquet", false) assert.NoError(t, err) - fmt.Printf("bAfter\n%s\n", bAfter) - assert.Equal(t, bBefore.String(), bAfter.String()) require.NoError(t, os.Remove(testOutputFileName+"_withrows.parquet")) diff --git a/bowstring.go b/bowstring.go index 6c050d7..8b4d12c 100644 --- a/bowstring.go +++ b/bowstring.go @@ -46,7 +46,7 @@ func (b *bow) String() string { case TimestampNano: cells = append(cells, ti.ToTime(arrow.Nanosecond).Format(time.RFC3339Nano)) default: - panic("") + panic(fmt.Sprintf("unsupported type '%s'", b.ColumnType(colIndex))) } } else { cells = append(cells, fmt.Sprintf("%v", row[b.Schema().Field(colIndex).Name])) @@ -68,5 +68,4 @@ func (b *bow) String() string { } return writer.String() - } From c3a18d5fb224a2a649db8c965db1b72c3b93df6a Mon Sep 17 00:00:00 2001 From: agelloz Date: Wed, 4 May 2022 17:16:42 +0200 Subject: [PATCH 21/29] simplifications --- bowbuffer.go | 12 +++++++++++- bowconvert.go | 31 +++---------------------------- bowseries.go | 29 ++++++++++++++++++++++------- bowtypes.go | 19 ++++++++++--------- 4 files changed, 46 insertions(+), 45 deletions(-) diff --git a/bowbuffer.go b/bowbuffer.go index 67f7ced..834eaa0 100644 --- a/bowbuffer.go +++ b/bowbuffer.go @@ -42,6 +42,7 @@ func NewBuffer(size int, typ Type) Buffer { return buf } +// Len returns the size of the underlying slice of data in the Buffer. func (b Buffer) Len() int { switch b.DataType { case Int64: @@ -59,6 +60,8 @@ func (b Buffer) Len() int { } } +// SetOrDrop sets the Buffer data at index `i` by attempting to convert `value` to its DataType. +// Sets the value to nil if the conversion failed or if `value` is nil. func (b *Buffer) SetOrDrop(i int, value interface{}) { var valid bool switch b.DataType { @@ -83,6 +86,8 @@ func (b *Buffer) SetOrDrop(i int, value interface{}) { } } +// SetOrDropStrict sets the Buffer data at index `i` by attempting a type assertion of `value` to its DataType. +// Sets the value to nil if the assertion failed or if `value` is nil. func (b *Buffer) SetOrDropStrict(i int, value interface{}) { var valid bool switch b.DataType { @@ -95,7 +100,12 @@ func (b *Buffer) SetOrDropStrict(i int, value interface{}) { case String: b.Data.([]string)[i], valid = value.(string) case TimestampSec, TimestampMilli, TimestampMicro, TimestampNano: - b.Data.([]arrow.Timestamp)[i], valid = value.(arrow.Timestamp) + switch value := value.(type) { + case arrow.Timestamp: + b.Data.([]arrow.Timestamp)[i] = value + case int64: + b.Data.([]int64)[i] = value + } default: panic(fmt.Errorf("unsupported type '%s'", b.DataType)) } diff --git a/bowconvert.go b/bowconvert.go index 2726ed7..5fc751a 100644 --- a/bowconvert.go +++ b/bowconvert.go @@ -144,35 +144,10 @@ func ToString(input interface{}) (output string, ok bool) { return } -// ToTimestampSec returns an arrow.Timestamp value and a bool whether the conversion was successful or not. +// ToTimestamp returns an arrow.Timestamp value and a bool whether the conversion was successful or not. // String values are first interpreted with strconv.ParseInt. -// If it fails, the values are parsed with arrow.TimestampFromString with the arrow.Second time unit. -func ToTimestampSec(input interface{}) (output arrow.Timestamp, ok bool) { - return toTimestamp(input, arrow.Second) -} - -// ToTimestampMilli returns an arrow.Timestamp value and a bool whether the conversion was successful or not. -// String values are first interpreted with strconv.ParseInt. -// If it fails, the values are parsed with arrow.TimestampFromString with the arrow.Millisecond time unit. -func ToTimestampMilli(input interface{}) (output arrow.Timestamp, ok bool) { - return toTimestamp(input, arrow.Millisecond) -} - -// ToTimestampMicro returns an arrow.Timestamp value and a bool whether the conversion was successful or not. -// String values are first interpreted with strconv.ParseInt. -// If it fails, the values are parsed with arrow.TimestampFromString with the arrow.Microsecond time unit. -func ToTimestampMicro(input interface{}) (output arrow.Timestamp, ok bool) { - return toTimestamp(input, arrow.Microsecond) -} - -// ToTimestampNano returns an arrow.Timestamp value and a bool whether the conversion was successful or not. -// String values are first interpreted with strconv.ParseInt. -// If it fails, the values are parsed with arrow.TimestampFromString with the arrow.Nanosecond time unit. -func ToTimestampNano(input interface{}) (output arrow.Timestamp, ok bool) { - return toTimestamp(input, arrow.Nanosecond) -} - -func toTimestamp(input interface{}, timeUnit arrow.TimeUnit) (output arrow.Timestamp, ok bool) { +// If it fails, the values are parsed with arrow.TimestampFromString with the `timeUnit` time unit. +func ToTimestamp(input interface{}, timeUnit arrow.TimeUnit) (output arrow.Timestamp, ok bool) { switch input := input.(type) { case json.Number: output, err := input.Int64() diff --git a/bowseries.go b/bowseries.go index bcfa7f9..6345def 100644 --- a/bowseries.go +++ b/bowseries.go @@ -20,7 +20,7 @@ type Series struct { // NewSeries returns a new Series from: // - name: string // - typ: Bow data Type -// - dataArray: slice of the data +// - dataArray: slice of the data (for Timestamp types, data can be of type []int64 or []arrow.Timestamp) // - validityArray: // - if nil, the data will be non-nil // - can be of type []bool or []byte to represent nil values @@ -39,8 +39,14 @@ func NewSeries(name string, typ Type, dataArray interface{}, validityArray inter return newStringSeries(name, dataArray.([]string), buildNullBitmapBytes(len(dataArray.([]string)), validityArray)) case TimestampSec, TimestampMilli, TimestampMicro, TimestampNano: - return newTimestampSeries(name, typ, dataArray.([]arrow.Timestamp), - buildNullBitmapBytes(len(dataArray.([]arrow.Timestamp)), validityArray)) + switch data := dataArray.(type) { + case []arrow.Timestamp: + return newTimestampSeries(name, typ, data, buildNullBitmapBytes(len(data), validityArray)) + case []int64: + return newTimestampSeries(name, typ, data, buildNullBitmapBytes(len(data), validityArray)) + default: + panic(fmt.Errorf("unsupported type '%T' for Timestamp dataArray", dataArray)) + } default: panic(fmt.Errorf("unsupported type '%s'", typ)) } @@ -58,7 +64,7 @@ func NewSeriesFromBuffer(name string, buf Buffer) Series { case String: return newStringSeries(name, buf.Data.([]string), buf.nullBitmapBytes) case TimestampSec, TimestampMilli, TimestampMicro, TimestampNano: - return newTimestampSeries(name, buf.DataType, buf.Data.([]arrow.Timestamp), buf.nullBitmapBytes) + return newTimestampSeries(name, buf.DataType, buf.Data, buf.nullBitmapBytes) default: panic(fmt.Errorf("unsupported type '%s'", buf.DataType)) } @@ -135,7 +141,7 @@ func NewSeriesFromInterfaces(name string, typ Type, data []interface{}) Series { defer builder.Release() builder.Resize(len(data)) for i := 0; i < len(data); i++ { - v, ok := mapBowTypeToConvertFunc[typ](data[i]) + v, ok := ToTimestamp(data[i], mapBowTypeToTimeUnit[typ]) if !ok { builder.AppendNull() continue @@ -192,11 +198,20 @@ func newStringSeries(name string, data []string, valid []byte) Series { return Series{Name: name, Array: builder.NewArray()} } -func newTimestampSeries(name string, typ Type, data []arrow.Timestamp, valid []byte) Series { +func newTimestampSeries(name string, typ Type, data interface{}, valid []byte) Series { mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) builder := array.NewTimestampBuilder(mem, mapBowToArrowTypes[typ].(*arrow.TimestampType)) defer builder.Release() - builder.AppendValues(data, buildNullBitmapBool(len(data), valid)) + switch data := data.(type) { + case []arrow.Timestamp: + builder.AppendValues(data, buildNullBitmapBool(len(data), valid)) + case []int64: + tsData := make([]arrow.Timestamp, len(data)) + for i, intVal := range data { + tsData[i] = arrow.Timestamp(intVal) + } + builder.AppendValues(tsData, buildNullBitmapBool(len(tsData), valid)) + } return Series{Name: name, Array: builder.NewArray()} } diff --git a/bowtypes.go b/bowtypes.go index 8bba80f..9ac5c17 100644 --- a/bowtypes.go +++ b/bowtypes.go @@ -6,6 +6,7 @@ import ( "github.com/apache/arrow/go/v8/arrow" ) +// Type is a Bow logical type. type Type int // How to add a Type: @@ -46,11 +47,11 @@ var ( TimestampMicro: arrow.FixedWidthTypes.Timestamp_us, TimestampNano: arrow.FixedWidthTypes.Timestamp_ns, } - mapBowTypeToConvertFunc = map[Type]func(i interface{}) (arrow.Timestamp, bool){ - TimestampSec: ToTimestampSec, - TimestampMilli: ToTimestampMilli, - TimestampMicro: ToTimestampMicro, - TimestampNano: ToTimestampNano, + mapBowTypeToTimeUnit = map[Type]arrow.TimeUnit{ + TimestampSec: arrow.Second, + TimestampMilli: arrow.Millisecond, + TimestampMicro: arrow.Microsecond, + TimestampNano: arrow.Nanosecond, } allType = func() []Type { res := make([]Type, InputDependent-1) @@ -74,13 +75,13 @@ func (t Type) Convert(input interface{}) interface{} { case String: output, ok = ToString(input) case TimestampSec: - output, ok = ToTimestampSec(input) + output, ok = ToTimestamp(input, arrow.Second) case TimestampMilli: - output, ok = ToTimestampMilli(input) + output, ok = ToTimestamp(input, arrow.Millisecond) case TimestampMicro: - output, ok = ToTimestampMicro(input) + output, ok = ToTimestamp(input, arrow.Microsecond) case TimestampNano: - output, ok = ToTimestampNano(input) + output, ok = ToTimestamp(input, arrow.Nanosecond) } if ok { return output From b4dc811fd9e353ed45428490d12c6f81909aef0c Mon Sep 17 00:00:00 2001 From: agelloz Date: Wed, 4 May 2022 18:51:24 +0200 Subject: [PATCH 22/29] simplifications --- bowbuffer.go | 35 +------- bowconvert_test.go | 56 ++++++------ bowseries.go | 220 +++++++++++++++++++++++++++------------------ 3 files changed, 165 insertions(+), 146 deletions(-) diff --git a/bowbuffer.go b/bowbuffer.go index 834eaa0..7f2fb36 100644 --- a/bowbuffer.go +++ b/bowbuffer.go @@ -100,11 +100,14 @@ func (b *Buffer) SetOrDropStrict(i int, value interface{}) { case String: b.Data.([]string)[i], valid = value.(string) case TimestampSec, TimestampMilli, TimestampMicro, TimestampNano: + valid = true switch value := value.(type) { case arrow.Timestamp: b.Data.([]arrow.Timestamp)[i] = value case int64: b.Data.([]int64)[i] = value + default: + valid = false } default: panic(fmt.Errorf("unsupported type '%s'", b.DataType)) @@ -201,38 +204,6 @@ func (b *bow) NewBufferFromCol(colIndex int) Buffer { return res } -func buildNullBitmapBytes(dataLength int, validityArray interface{}) []byte { - var res []byte - nullBitmapLength := bitutil.CeilByte(dataLength) / 8 - - switch valid := validityArray.(type) { - case nil: - res = make([]byte, nullBitmapLength) - for i := 0; i < dataLength; i++ { - bitutil.SetBit(res, i) - } - case []bool: - if len(valid) != dataLength { - panic(fmt.Errorf("dataArray and validityArray have different lengths")) - } - res = make([]byte, nullBitmapLength) - for i := 0; i < dataLength; i++ { - if valid[i] { - bitutil.SetBit(res, i) - } - } - case []byte: - if len(valid) != nullBitmapLength { - panic(fmt.Errorf("dataArray and validityArray have different lengths")) - } - return valid - default: - panic(fmt.Errorf("unsupported type '%T'", valid)) - } - - return res -} - // NewBufferFromInterfaces returns a new typed Buffer with the data represented as a slice of interface{}, with eventual nil values. func NewBufferFromInterfaces(typ Type, data []interface{}) (Buffer, error) { buf := NewBuffer(len(data), typ) diff --git a/bowconvert_test.go b/bowconvert_test.go index a2ea94a..a41e24f 100644 --- a/bowconvert_test.go +++ b/bowconvert_test.go @@ -129,23 +129,23 @@ func TestToString(t *testing.T) { func TestToTimestamp(t *testing.T) { t.Run("Sec", func(t *testing.T) { - v, ok := ToTimestampSec(true) + v, ok := ToTimestamp(true, arrow.Second) require.True(t, ok) assert.Equal(t, arrow.Timestamp(1), v) - v, ok = ToTimestampSec(false) + v, ok = ToTimestamp(false, arrow.Second) require.True(t, ok) assert.Equal(t, arrow.Timestamp(0), v) - v, ok = ToTimestampSec(0.) + v, ok = ToTimestamp(0., arrow.Second) require.True(t, ok) assert.Equal(t, arrow.Timestamp(0), v) - v, ok = ToTimestampSec(0) + v, ok = ToTimestamp(0, arrow.Second) require.True(t, ok) assert.Equal(t, arrow.Timestamp(0), v) - v, ok = ToTimestampSec("0") + v, ok = ToTimestamp("0", arrow.Second) require.True(t, ok) assert.Equal(t, arrow.Timestamp(0), v) assert.Equal(t, "1970-01-01T00:00:00Z", @@ -155,35 +155,35 @@ func TestToTimestamp(t *testing.T) { ti, err := time.Parse(time.RFC3339, formattedTimeSec) require.NoError(t, err) - v, ok = ToTimestampSec(ti.Unix()) + v, ok = ToTimestamp(ti.Unix(), arrow.Second) require.True(t, ok) assert.Equal(t, "2022-04-27T00:00:00Z", v.ToTime(arrow.Second).Format(time.RFC3339Nano)) - v, ok = ToTimestampSec(formattedTimeSec) + v, ok = ToTimestamp(formattedTimeSec, arrow.Second) require.True(t, ok) assert.Equal(t, "2022-04-27T00:00:00Z", v.ToTime(arrow.Second).Format(time.RFC3339Nano)) }) t.Run("Milli", func(t *testing.T) { - v, ok := ToTimestampMilli(true) + v, ok := ToTimestamp(true, arrow.Millisecond) require.True(t, ok) assert.Equal(t, arrow.Timestamp(1), v) - v, ok = ToTimestampMilli(false) + v, ok = ToTimestamp(false, arrow.Millisecond) require.True(t, ok) assert.Equal(t, arrow.Timestamp(0), v) - v, ok = ToTimestampMilli(0.) + v, ok = ToTimestamp(0., arrow.Millisecond) require.True(t, ok) assert.Equal(t, arrow.Timestamp(0), v) - v, ok = ToTimestampMilli(0) + v, ok = ToTimestamp(0, arrow.Millisecond) require.True(t, ok) assert.Equal(t, arrow.Timestamp(0), v) - v, ok = ToTimestampMilli("0") + v, ok = ToTimestamp("0", arrow.Millisecond) require.True(t, ok) assert.Equal(t, arrow.Timestamp(0), v) assert.Equal(t, "1970-01-01T00:00:00Z", @@ -193,35 +193,35 @@ func TestToTimestamp(t *testing.T) { ti, err := time.Parse(time.RFC3339, formattedTimeMilli) require.NoError(t, err) - v, ok = ToTimestampMilli(ti.UnixMilli()) + v, ok = ToTimestamp(ti.UnixMilli(), arrow.Millisecond) require.True(t, ok) assert.Equal(t, "2022-04-27T00:00:00.123Z", v.ToTime(arrow.Millisecond).Format(time.RFC3339Nano)) - v, ok = ToTimestampMilli(formattedTimeMilli) + v, ok = ToTimestamp(formattedTimeMilli, arrow.Millisecond) require.True(t, ok) assert.Equal(t, "2022-04-27T00:00:00.123Z", v.ToTime(arrow.Millisecond).Format(time.RFC3339Nano)) }) t.Run("Micro", func(t *testing.T) { - v, ok := ToTimestampMicro(true) + v, ok := ToTimestamp(true, arrow.Microsecond) require.True(t, ok) assert.Equal(t, arrow.Timestamp(1), v) - v, ok = ToTimestampMicro(false) + v, ok = ToTimestamp(false, arrow.Microsecond) require.True(t, ok) assert.Equal(t, arrow.Timestamp(0), v) - v, ok = ToTimestampMicro(0.) + v, ok = ToTimestamp(0., arrow.Microsecond) require.True(t, ok) assert.Equal(t, arrow.Timestamp(0), v) - v, ok = ToTimestampMicro(0) + v, ok = ToTimestamp(0, arrow.Microsecond) require.True(t, ok) assert.Equal(t, arrow.Timestamp(0), v) - v, ok = ToTimestampMicro("0") + v, ok = ToTimestamp("0", arrow.Microsecond) require.True(t, ok) assert.Equal(t, arrow.Timestamp(0), v) assert.Equal(t, "1970-01-01T00:00:00Z", @@ -231,35 +231,35 @@ func TestToTimestamp(t *testing.T) { ti, err := time.Parse(time.RFC3339, formattedTimeMicro) require.NoError(t, err) - v, ok = ToTimestampMicro(ti.UnixMicro()) + v, ok = ToTimestamp(ti.UnixMicro(), arrow.Microsecond) require.True(t, ok) assert.Equal(t, "2022-04-27T00:00:00.123456Z", v.ToTime(arrow.Microsecond).Format(time.RFC3339Nano)) - v, ok = ToTimestampMicro(formattedTimeMicro) + v, ok = ToTimestamp(formattedTimeMicro, arrow.Microsecond) require.True(t, ok) assert.Equal(t, "2022-04-27T00:00:00.123456Z", v.ToTime(arrow.Microsecond).Format(time.RFC3339Nano)) }) t.Run("Nano", func(t *testing.T) { - v, ok := ToTimestampNano(true) + v, ok := ToTimestamp(true, arrow.Nanosecond) require.True(t, ok) assert.Equal(t, arrow.Timestamp(1), v) - v, ok = ToTimestampNano(false) + v, ok = ToTimestamp(false, arrow.Nanosecond) require.True(t, ok) assert.Equal(t, arrow.Timestamp(0), v) - v, ok = ToTimestampNano(0.) + v, ok = ToTimestamp(0., arrow.Nanosecond) require.True(t, ok) assert.Equal(t, arrow.Timestamp(0), v) - v, ok = ToTimestampNano(0) + v, ok = ToTimestamp(0, arrow.Nanosecond) require.True(t, ok) assert.Equal(t, arrow.Timestamp(0), v) - v, ok = ToTimestampNano("0") + v, ok = ToTimestamp("0", arrow.Nanosecond) require.True(t, ok) assert.Equal(t, arrow.Timestamp(0), v) assert.Equal(t, "1970-01-01T00:00:00Z", @@ -269,12 +269,12 @@ func TestToTimestamp(t *testing.T) { ti, err := time.Parse(time.RFC3339, formattedTimeNano) require.NoError(t, err) - v, ok = ToTimestampNano(ti.UnixNano()) + v, ok = ToTimestamp(ti.UnixNano(), arrow.Nanosecond) require.True(t, ok) assert.Equal(t, "2022-04-27T00:00:00.123456789Z", v.ToTime(arrow.Nanosecond).Format(time.RFC3339Nano)) - v, ok = ToTimestampNano(formattedTimeNano) + v, ok = ToTimestamp(formattedTimeNano, arrow.Nanosecond) require.True(t, ok) assert.Equal(t, "2022-04-27T00:00:00.123456789Z", v.ToTime(arrow.Nanosecond).Format(time.RFC3339Nano)) diff --git a/bowseries.go b/bowseries.go index 6345def..46950e6 100644 --- a/bowseries.go +++ b/bowseries.go @@ -27,26 +27,60 @@ type Series struct { func NewSeries(name string, typ Type, dataArray interface{}, validityArray interface{}) Series { switch typ { case Int64: - return newInt64Series(name, dataArray.([]int64), - buildNullBitmapBytes(len(dataArray.([]int64)), validityArray)) + length := len(dataArray.([]int64)) + valid := buildNullBitmapBytes(length, validityArray) + return Series{ + Name: name, + Array: array.NewInt64Data( + array.NewData(mapBowToArrowTypes[Int64], length, + []*memory.Buffer{ + memory.NewBufferBytes(valid), + memory.NewBufferBytes(arrow.Int64Traits.CastToBytes(dataArray.([]int64))), + }, nil, length-bitutil.CountSetBits(valid, 0, length), 0), + ), + } case Float64: - return newFloat64Series(name, dataArray.([]float64), - buildNullBitmapBytes(len(dataArray.([]float64)), validityArray)) + length := len(dataArray.([]float64)) + valid := buildNullBitmapBytes(length, validityArray) + return Series{ + Name: name, + Array: array.NewFloat64Data( + array.NewData(mapBowToArrowTypes[Float64], length, + []*memory.Buffer{ + memory.NewBufferBytes(valid), + memory.NewBufferBytes(arrow.Float64Traits.CastToBytes(dataArray.([]float64))), + }, nil, length-bitutil.CountSetBits(valid, 0, length), 0), + ), + } case Boolean: - return newBooleanSeries(name, dataArray.([]bool), - buildNullBitmapBytes(len(dataArray.([]bool)), validityArray)) + mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) + builder := array.NewBooleanBuilder(mem) + defer builder.Release() + builder.AppendValues(dataArray.([]bool), buildNullBitmapBool(len(dataArray.([]bool)), validityArray)) + return Series{Name: name, Array: builder.NewArray()} case String: - return newStringSeries(name, dataArray.([]string), - buildNullBitmapBytes(len(dataArray.([]string)), validityArray)) + mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) + builder := array.NewStringBuilder(mem) + defer builder.Release() + builder.AppendValues(dataArray.([]string), buildNullBitmapBool(len(dataArray.([]string)), validityArray)) + return Series{Name: name, Array: builder.NewArray()} case TimestampSec, TimestampMilli, TimestampMicro, TimestampNano: + mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) + builder := array.NewTimestampBuilder(mem, mapBowToArrowTypes[typ].(*arrow.TimestampType)) + defer builder.Release() switch data := dataArray.(type) { case []arrow.Timestamp: - return newTimestampSeries(name, typ, data, buildNullBitmapBytes(len(data), validityArray)) + builder.AppendValues(data, buildNullBitmapBool(len(data), validityArray)) case []int64: - return newTimestampSeries(name, typ, data, buildNullBitmapBytes(len(data), validityArray)) + tsData := make([]arrow.Timestamp, len(data)) + for i, intVal := range data { + tsData[i] = arrow.Timestamp(intVal) + } + builder.AppendValues(tsData, buildNullBitmapBool(len(tsData), validityArray)) default: panic(fmt.Errorf("unsupported type '%T' for Timestamp dataArray", dataArray)) } + return Series{Name: name, Array: builder.NewArray()} default: panic(fmt.Errorf("unsupported type '%s'", typ)) } @@ -56,15 +90,58 @@ func NewSeries(name string, typ Type, dataArray interface{}, validityArray inter func NewSeriesFromBuffer(name string, buf Buffer) Series { switch buf.DataType { case Int64: - return newInt64Series(name, buf.Data.([]int64), buf.nullBitmapBytes) + length := len(buf.Data.([]int64)) + return Series{ + Name: name, + Array: array.NewInt64Data( + array.NewData(mapBowToArrowTypes[Int64], length, + []*memory.Buffer{ + memory.NewBufferBytes(buf.nullBitmapBytes), + memory.NewBufferBytes(arrow.Int64Traits.CastToBytes(buf.Data.([]int64))), + }, nil, length-bitutil.CountSetBits(buf.nullBitmapBytes, 0, length), 0), + ), + } case Float64: - return newFloat64Series(name, buf.Data.([]float64), buf.nullBitmapBytes) + length := len(buf.Data.([]float64)) + return Series{ + Name: name, + Array: array.NewFloat64Data( + array.NewData(mapBowToArrowTypes[Float64], length, + []*memory.Buffer{ + memory.NewBufferBytes(buf.nullBitmapBytes), + memory.NewBufferBytes(arrow.Float64Traits.CastToBytes(buf.Data.([]float64))), + }, nil, length-bitutil.CountSetBits(buf.nullBitmapBytes, 0, length), 0), + ), + } case Boolean: - return newBooleanSeries(name, buf.Data.([]bool), buf.nullBitmapBytes) + mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) + builder := array.NewBooleanBuilder(mem) + defer builder.Release() + builder.AppendValues(buf.Data.([]bool), buildNullBitmapBool(len(buf.Data.([]bool)), buf.nullBitmapBytes)) + return Series{Name: name, Array: builder.NewArray()} case String: - return newStringSeries(name, buf.Data.([]string), buf.nullBitmapBytes) + mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) + builder := array.NewStringBuilder(mem) + defer builder.Release() + builder.AppendValues(buf.Data.([]string), buildNullBitmapBool(len(buf.Data.([]string)), buf.nullBitmapBytes)) + return Series{Name: name, Array: builder.NewArray()} case TimestampSec, TimestampMilli, TimestampMicro, TimestampNano: - return newTimestampSeries(name, buf.DataType, buf.Data, buf.nullBitmapBytes) + mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) + builder := array.NewTimestampBuilder(mem, mapBowToArrowTypes[buf.DataType].(*arrow.TimestampType)) + defer builder.Release() + switch data := buf.Data.(type) { + case []arrow.Timestamp: + builder.AppendValues(data, buildNullBitmapBool(len(data), buf.nullBitmapBytes)) + case []int64: + tsData := make([]arrow.Timestamp, len(data)) + for i, intVal := range data { + tsData[i] = arrow.Timestamp(intVal) + } + builder.AppendValues(tsData, buildNullBitmapBool(len(tsData), buf.nullBitmapBytes)) + default: + panic(fmt.Errorf("unsupported type '%T' for Buffer Data", buf.Data)) + } + return Series{Name: name, Array: builder.NewArray()} default: panic(fmt.Errorf("unsupported type '%s'", buf.DataType)) } @@ -154,65 +231,55 @@ func NewSeriesFromInterfaces(name string, typ Type, data []interface{}) Series { } } -func newInt64Series(name string, data []int64, valid []byte) Series { - length := len(data) - return Series{ - Name: name, - Array: array.NewInt64Data( - array.NewData(mapBowToArrowTypes[Int64], length, - []*memory.Buffer{ - memory.NewBufferBytes(valid), - memory.NewBufferBytes(arrow.Int64Traits.CastToBytes(data)), - }, nil, length-bitutil.CountSetBits(valid, 0, length), 0), - ), - } -} - -func newFloat64Series(name string, data []float64, valid []byte) Series { - length := len(data) - return Series{ - Name: name, - Array: array.NewFloat64Data( - array.NewData(mapBowToArrowTypes[Float64], length, - []*memory.Buffer{ - memory.NewBufferBytes(valid), - memory.NewBufferBytes(arrow.Float64Traits.CastToBytes(data)), - }, nil, length-bitutil.CountSetBits(valid, 0, length), 0), - ), +func getBowTypeFromInterfaces(colBasedData []interface{}) (Type, error) { + for _, val := range colBasedData { + if val != nil { + switch val.(type) { + case float64, json.Number: + return Float64, nil + case int, int64: + return Int64, nil + case string: + return String, nil + case bool: + return Boolean, nil + } + } } -} -func newBooleanSeries(name string, data []bool, valid []byte) Series { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - builder := array.NewBooleanBuilder(mem) - defer builder.Release() - builder.AppendValues(data, buildNullBitmapBool(len(data), valid)) - return Series{Name: name, Array: builder.NewArray()} + return Float64, nil } -func newStringSeries(name string, data []string, valid []byte) Series { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - builder := array.NewStringBuilder(mem) - defer builder.Release() - builder.AppendValues(data, buildNullBitmapBool(len(data), valid)) - return Series{Name: name, Array: builder.NewArray()} -} +func buildNullBitmapBytes(dataLength int, validityArray interface{}) []byte { + var res []byte + nullBitmapLength := bitutil.CeilByte(dataLength) / 8 -func newTimestampSeries(name string, typ Type, data interface{}, valid []byte) Series { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - builder := array.NewTimestampBuilder(mem, mapBowToArrowTypes[typ].(*arrow.TimestampType)) - defer builder.Release() - switch data := data.(type) { - case []arrow.Timestamp: - builder.AppendValues(data, buildNullBitmapBool(len(data), valid)) - case []int64: - tsData := make([]arrow.Timestamp, len(data)) - for i, intVal := range data { - tsData[i] = arrow.Timestamp(intVal) + switch valid := validityArray.(type) { + case nil: + res = make([]byte, nullBitmapLength) + for i := 0; i < dataLength; i++ { + bitutil.SetBit(res, i) + } + case []bool: + if len(valid) != dataLength { + panic(fmt.Errorf("dataArray and validityArray have different lengths")) + } + res = make([]byte, nullBitmapLength) + for i := 0; i < dataLength; i++ { + if valid[i] { + bitutil.SetBit(res, i) + } } - builder.AppendValues(tsData, buildNullBitmapBool(len(tsData), valid)) + case []byte: + if len(valid) != nullBitmapLength { + panic(fmt.Errorf("dataArray and validityArray have different lengths")) + } + return valid + default: + panic(fmt.Errorf("unsupported type '%T'", valid)) } - return Series{Name: name, Array: builder.NewArray()} + + return res } func buildNullBitmapBool(dataLength int, validityArray interface{}) []bool { @@ -239,22 +306,3 @@ func buildNullBitmapBool(dataLength int, validityArray interface{}) []bool { panic(fmt.Errorf("unsupported type '%T'", valid)) } } - -func getBowTypeFromInterfaces(colBasedData []interface{}) (Type, error) { - for _, val := range colBasedData { - if val != nil { - switch val.(type) { - case float64, json.Number: - return Float64, nil - case int, int64: - return Int64, nil - case string: - return String, nil - case bool: - return Boolean, nil - } - } - } - - return Float64, nil -} From e2f881f28c4f4b84b43c6d83e7d670e27fdf9816 Mon Sep 17 00:00:00 2001 From: agelloz Date: Wed, 4 May 2022 19:11:06 +0200 Subject: [PATCH 23/29] simplifications --- bowconvert.go | 64 +++++++++++++++++++++++++++++++++------------------ 1 file changed, 42 insertions(+), 22 deletions(-) diff --git a/bowconvert.go b/bowconvert.go index 5fc751a..7aa100c 100644 --- a/bowconvert.go +++ b/bowconvert.go @@ -30,10 +30,7 @@ func ToInt64(input interface{}) (output int64, ok bool) { case float64: return int64(input), true case bool: - if input { - return 1, true - } - return 0, true + return boolToInt64(input) case string: output, err := strconv.ParseInt(input, 10, 64) return output, err == nil @@ -43,6 +40,13 @@ func ToInt64(input interface{}) (output int64, ok bool) { return } +func boolToInt64(input bool) (int64, bool) { + if input { + return 1, true + } + return 0, true +} + // ToFloat64 attempts to convert `input` to float64. // Return also a false boolean if the conversion failed. func ToFloat64(input interface{}) (output float64, ok bool) { @@ -65,10 +69,7 @@ func ToFloat64(input interface{}) (output float64, ok bool) { case float32: return float64(input), true case bool: - if input { - return 1., true - } - return 0., true + return boolToFloat64(input) case string: output, err := strconv.ParseFloat(input, 64) return output, err == nil @@ -78,6 +79,13 @@ func ToFloat64(input interface{}) (output float64, ok bool) { return } +func boolToFloat64(input bool) (float64, bool) { + if input { + return 1., true + } + return 0., true +} + // ToBoolean attempts to convert `input` to bool. // Return also a false boolean if the conversion failed. // In case of numeric type, returns true if the value is non-zero. @@ -116,10 +124,7 @@ func ToBoolean(input interface{}) (output bool, ok bool) { func ToString(input interface{}) (output string, ok bool) { switch input := input.(type) { case bool: - if input { - return "true", true - } - return "false", true + return boolToString(input) case string: return input, true case json.Number: @@ -144,6 +149,13 @@ func ToString(input interface{}) (output string, ok bool) { return } +func boolToString(input bool) (string, bool) { + if input { + return "true", true + } + return "false", true +} + // ToTimestamp returns an arrow.Timestamp value and a bool whether the conversion was successful or not. // String values are first interpreted with strconv.ParseInt. // If it fails, the values are parsed with arrow.TimestampFromString with the `timeUnit` time unit. @@ -167,19 +179,27 @@ func ToTimestamp(input interface{}, timeUnit arrow.TimeUnit) (output arrow.Times case float64: return arrow.Timestamp(input), true case bool: - if input { - return 1, true - } - return 0, true + return boolToTimestamp(input) case string: - output, err := strconv.ParseInt(input, 10, 64) - if err == nil { - return arrow.Timestamp(output), true - } - outputTS, err := arrow.TimestampFromString(input, timeUnit) - return outputTS, err == nil + return stringToTimestamp(input, timeUnit) case arrow.Timestamp: return input, true } return } + +func boolToTimestamp(input bool) (arrow.Timestamp, bool) { + if input { + return 1, true + } + return 0, true +} + +func stringToTimestamp(input string, timeUnit arrow.TimeUnit) (arrow.Timestamp, bool) { + output, err := strconv.ParseInt(input, 10, 64) + if err == nil { + return arrow.Timestamp(output), true + } + outputTS, err := arrow.TimestampFromString(input, timeUnit) + return outputTS, err == nil +} From f165c312e74ca9b689af72eec4f367c8df1b2dad Mon Sep 17 00:00:00 2001 From: agelloz Date: Wed, 4 May 2022 19:23:41 +0200 Subject: [PATCH 24/29] simplifications --- bowbuffer.go | 37 ++--------- bowgetters.go | 8 +-- bowjoin.go | 36 +++++----- bowseries.go | 178 +++++++++++++++++++++++++++++++------------------- 4 files changed, 137 insertions(+), 122 deletions(-) diff --git a/bowbuffer.go b/bowbuffer.go index 1c40ab7..f5ae480 100644 --- a/bowbuffer.go +++ b/bowbuffer.go @@ -39,6 +39,7 @@ func NewBuffer(size int, typ Type) Buffer { return buf } +// Len returns the size of the underlying slice of data in the Buffer. func (b Buffer) Len() int { switch b.DataType { case Int64: @@ -54,6 +55,8 @@ func (b Buffer) Len() int { } } +// SetOrDrop sets the Buffer data at index `i` by attempting to convert `value` to its DataType. +// Sets the value to nil if the conversion failed or if `value` is nil. func (b *Buffer) SetOrDrop(i int, value interface{}) { var valid bool switch b.DataType { @@ -76,6 +79,8 @@ func (b *Buffer) SetOrDrop(i int, value interface{}) { } } +// SetOrDropStrict sets the Buffer data at index `i` by attempting a type assertion of `value` to its DataType. +// Sets the value to nil if the assertion failed or if `value` is nil. func (b *Buffer) SetOrDropStrict(i int, value interface{}) { var valid bool switch b.DataType { @@ -171,38 +176,6 @@ func (b *bow) NewBufferFromCol(colIndex int) Buffer { return res } -func buildNullBitmapBytes(dataLength int, validityArray interface{}) []byte { - var res []byte - nullBitmapLength := bitutil.CeilByte(dataLength) / 8 - - switch valid := validityArray.(type) { - case nil: - res = make([]byte, nullBitmapLength) - for i := 0; i < dataLength; i++ { - bitutil.SetBit(res, i) - } - case []bool: - if len(valid) != dataLength { - panic(fmt.Errorf("dataArray and validityArray have different lengths")) - } - res = make([]byte, nullBitmapLength) - for i := 0; i < dataLength; i++ { - if valid[i] { - bitutil.SetBit(res, i) - } - } - case []byte: - if len(valid) != nullBitmapLength { - panic(fmt.Errorf("dataArray and validityArray have different lengths")) - } - return valid - default: - panic(fmt.Errorf("unsupported type '%T'", valid)) - } - - return res -} - // NewBufferFromInterfaces returns a new typed Buffer with the data represented as a slice of interface{}, with eventual nil values. func NewBufferFromInterfaces(typ Type, data []interface{}) (Buffer, error) { buf := NewBuffer(len(data), typ) diff --git a/bowgetters.go b/bowgetters.go index b0c4035..d6bf9cf 100644 --- a/bowgetters.go +++ b/bowgetters.go @@ -178,8 +178,8 @@ func (b *bow) GetInt64(colIndex, rowIndex int) (int64, bool) { } return 0., false default: - panic(fmt.Errorf("unsupported type '%s'", - b.Schema().Field(colIndex).Type.Name())) + panic(fmt.Errorf("unsupported arrow.DataType '%s'", + b.Schema().Field(colIndex).Type)) } } @@ -241,8 +241,8 @@ func (b *bow) GetFloat64(colIndex, rowIndex int) (float64, bool) { } return 0., false default: - panic(fmt.Sprintf("unsupported type '%s'", - b.Schema().Field(colIndex).Type.Name())) + panic(fmt.Sprintf("unsupported arrow.DataType '%s'", + b.Schema().Field(colIndex).Type)) } } diff --git a/bowjoin.go b/bowjoin.go index 1807d89..77dac71 100644 --- a/bowjoin.go +++ b/bowjoin.go @@ -190,7 +190,7 @@ func innerFillLeftBowCols(newSeries *[]Series, left *bow, newNumRows int, for colIndex := 0; colIndex < left.NumCols(); colIndex++ { buf := NewBuffer(newNumRows, left.ColumnType(colIndex)) - switch left.ColumnType(colIndex) { + switch buf.DataType { case Int64: data := array.NewInt64Data(left.Column(colIndex).Data()) for rowIndex := 0; rowIndex < newNumRows; rowIndex++ { @@ -220,7 +220,7 @@ func innerFillLeftBowCols(newSeries *[]Series, left *bow, newNumRows int, } } default: - panic(fmt.Errorf("unsupported type '%v'", left.ColumnType(colIndex))) + panic(fmt.Errorf("unsupported type '%s'", buf.DataType)) } (*newSeries)[colIndex] = NewSeriesFromBuffer(left.ColumnName(colIndex), buf) @@ -238,7 +238,7 @@ func innerFillRightBowCols(newSeries *[]Series, left, right *bow, newNumRows, ne } // Fill common rows from right bow - switch right.ColumnType(rightCol) { + switch buf.DataType { case Int64: data := array.NewInt64Data(right.Column(rightCol).Data()) for rowIndex := 0; rowIndex < newNumRows; rowIndex++ { @@ -268,7 +268,7 @@ func innerFillRightBowCols(newSeries *[]Series, left, right *bow, newNumRows, ne } } default: - panic(fmt.Errorf("unsupported type '%v'", right.ColumnType(rightCol))) + panic(fmt.Errorf("unsupported type '%s'", buf.DataType)) } (*newSeries)[colIndex] = NewSeriesFromBuffer(right.ColumnName(rightCol), buf) @@ -286,7 +286,7 @@ func outerFillLeftBowCols(newSeries *[]Series, left, right *bow, newNumRows, uni buf := NewBuffer(newNumRows, left.ColumnType(colIndex)) // Fill rows from left bow - switch left.ColumnType(colIndex) { + switch buf.DataType { case Int64: data := array.NewInt64Data(left.Column(colIndex).Data()) for newRow := 0; left.NumRows() > 0 && newRow < newNumRows; newRow++ { @@ -376,7 +376,7 @@ func outerFillLeftBowCols(newSeries *[]Series, left, right *bow, newNumRows, uni } } default: - panic(fmt.Errorf("unsupported type '%v'", left.ColumnType(colIndex))) + panic(fmt.Errorf("unsupported type '%s'", buf.DataType)) } // Fill remaining rows from right bow if column is common @@ -387,8 +387,8 @@ func outerFillLeftBowCols(newSeries *[]Series, left, right *bow, newNumRows, uni } for rightRow := 0; isColCommon && rightRow < right.NumRows(); rightRow++ { var isRowCommon bool - for commonRow := 0; commonRow < len(commonRows.r); commonRow++ { - if rightRow == commonRows.r[commonRow] { + for i := 0; i < len(commonRows.r); i++ { + if rightRow == commonRows.r[i] { isRowCommon = true break } @@ -416,7 +416,7 @@ func outerFillRightBowCols(newSeries *[]Series, left, right *bow, newNumCols, } buf := NewBuffer(newNumRows, right.ColumnType(rightCol)) - switch right.ColumnType(rightCol) { + switch buf.DataType { case Int64: data := array.NewInt64Data(right.Column(rightCol).Data()) @@ -441,8 +441,8 @@ func outerFillRightBowCols(newSeries *[]Series, left, right *bow, newNumCols, newRow := left.NumRows() + len(commonRows.r) - uniquesLeft for rightRow := 0; rightRow < right.NumRows(); rightRow++ { var isRowCommon bool - for commonRow := 0; commonRow < len(commonRows.r); commonRow++ { - if rightRow == commonRows.r[commonRow] { + for i := 0; i < len(commonRows.r); i++ { + if rightRow == commonRows.r[i] { isRowCommon = true break } @@ -478,8 +478,8 @@ func outerFillRightBowCols(newSeries *[]Series, left, right *bow, newNumCols, newRow := left.NumRows() + len(commonRows.r) - uniquesLeft for rightRow := 0; rightRow < right.NumRows(); rightRow++ { var isRowCommon bool - for commonRow := 0; commonRow < len(commonRows.r); commonRow++ { - if rightRow == commonRows.r[commonRow] { + for i := 0; i < len(commonRows.r); i++ { + if rightRow == commonRows.r[i] { isRowCommon = true break } @@ -515,8 +515,8 @@ func outerFillRightBowCols(newSeries *[]Series, left, right *bow, newNumCols, newRow := left.NumRows() + len(commonRows.r) - uniquesLeft for rightRow := 0; rightRow < right.NumRows(); rightRow++ { var isRowCommon bool - for commonRow := 0; commonRow < len(commonRows.r); commonRow++ { - if rightRow == commonRows.r[commonRow] { + for i := 0; i < len(commonRows.r); i++ { + if rightRow == commonRows.r[i] { isRowCommon = true break } @@ -552,8 +552,8 @@ func outerFillRightBowCols(newSeries *[]Series, left, right *bow, newNumCols, newRow := left.NumRows() + len(commonRows.r) - uniquesLeft for rightRow := 0; rightRow < right.NumRows(); rightRow++ { var isRowCommon bool - for commonRow := 0; commonRow < len(commonRows.r); commonRow++ { - if rightRow == commonRows.r[commonRow] { + for i := 0; i < len(commonRows.r); i++ { + if rightRow == commonRows.r[i] { isRowCommon = true break } @@ -566,7 +566,7 @@ func outerFillRightBowCols(newSeries *[]Series, left, right *bow, newNumCols, } } default: - panic(fmt.Errorf("unsupported type '%v'", right.ColumnType(rightCol))) + panic(fmt.Errorf("unsupported type '%s'", buf.DataType)) } (*newSeries)[colIndex] = NewSeriesFromBuffer(right.ColumnName(rightCol), buf) rightCol++ diff --git a/bowseries.go b/bowseries.go index f7e532a..fdedb23 100644 --- a/bowseries.go +++ b/bowseries.go @@ -27,17 +27,43 @@ type Series struct { func NewSeries(name string, typ Type, dataArray interface{}, validityArray interface{}) Series { switch typ { case Int64: - return newInt64Series(name, dataArray.([]int64), - buildNullBitmapBytes(len(dataArray.([]int64)), validityArray)) + length := len(dataArray.([]int64)) + valid := buildNullBitmapBytes(length, validityArray) + return Series{ + Name: name, + Array: array.NewInt64Data( + array.NewData(mapBowToArrowTypes[Int64], length, + []*memory.Buffer{ + memory.NewBufferBytes(valid), + memory.NewBufferBytes(arrow.Int64Traits.CastToBytes(dataArray.([]int64))), + }, nil, length-bitutil.CountSetBits(valid, 0, length), 0), + ), + } case Float64: - return newFloat64Series(name, dataArray.([]float64), - buildNullBitmapBytes(len(dataArray.([]float64)), validityArray)) + length := len(dataArray.([]float64)) + valid := buildNullBitmapBytes(length, validityArray) + return Series{ + Name: name, + Array: array.NewFloat64Data( + array.NewData(mapBowToArrowTypes[Float64], length, + []*memory.Buffer{ + memory.NewBufferBytes(valid), + memory.NewBufferBytes(arrow.Float64Traits.CastToBytes(dataArray.([]float64))), + }, nil, length-bitutil.CountSetBits(valid, 0, length), 0), + ), + } case Boolean: - return newBooleanSeries(name, dataArray.([]bool), - buildNullBitmapBytes(len(dataArray.([]bool)), validityArray)) + mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) + builder := array.NewBooleanBuilder(mem) + defer builder.Release() + builder.AppendValues(dataArray.([]bool), buildNullBitmapBool(len(dataArray.([]bool)), validityArray)) + return Series{Name: name, Array: builder.NewArray()} case String: - return newStringSeries(name, dataArray.([]string), - buildNullBitmapBytes(len(dataArray.([]string)), validityArray)) + mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) + builder := array.NewStringBuilder(mem) + defer builder.Release() + builder.AppendValues(dataArray.([]string), buildNullBitmapBool(len(dataArray.([]string)), validityArray)) + return Series{Name: name, Array: builder.NewArray()} default: panic(fmt.Errorf("unsupported type '%s'", typ)) } @@ -47,13 +73,41 @@ func NewSeries(name string, typ Type, dataArray interface{}, validityArray inter func NewSeriesFromBuffer(name string, buf Buffer) Series { switch buf.DataType { case Int64: - return newInt64Series(name, buf.Data.([]int64), buf.nullBitmapBytes) + length := len(buf.Data.([]int64)) + return Series{ + Name: name, + Array: array.NewInt64Data( + array.NewData(mapBowToArrowTypes[Int64], length, + []*memory.Buffer{ + memory.NewBufferBytes(buf.nullBitmapBytes), + memory.NewBufferBytes(arrow.Int64Traits.CastToBytes(buf.Data.([]int64))), + }, nil, length-bitutil.CountSetBits(buf.nullBitmapBytes, 0, length), 0), + ), + } case Float64: - return newFloat64Series(name, buf.Data.([]float64), buf.nullBitmapBytes) + length := len(buf.Data.([]float64)) + return Series{ + Name: name, + Array: array.NewFloat64Data( + array.NewData(mapBowToArrowTypes[Float64], length, + []*memory.Buffer{ + memory.NewBufferBytes(buf.nullBitmapBytes), + memory.NewBufferBytes(arrow.Float64Traits.CastToBytes(buf.Data.([]float64))), + }, nil, length-bitutil.CountSetBits(buf.nullBitmapBytes, 0, length), 0), + ), + } case Boolean: - return newBooleanSeries(name, buf.Data.([]bool), buf.nullBitmapBytes) + mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) + builder := array.NewBooleanBuilder(mem) + defer builder.Release() + builder.AppendValues(buf.Data.([]bool), buildNullBitmapBool(len(buf.Data.([]bool)), buf.nullBitmapBytes)) + return Series{Name: name, Array: builder.NewArray()} case String: - return newStringSeries(name, buf.Data.([]string), buf.nullBitmapBytes) + mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) + builder := array.NewStringBuilder(mem) + defer builder.Release() + builder.AppendValues(buf.Data.([]string), buildNullBitmapBool(len(buf.Data.([]string)), buf.nullBitmapBytes)) + return Series{Name: name, Array: builder.NewArray()} default: panic(fmt.Errorf("unsupported type '%s'", buf.DataType)) } @@ -130,48 +184,55 @@ func NewSeriesFromInterfaces(name string, typ Type, data []interface{}) Series { } } -func newInt64Series(name string, data []int64, valid []byte) Series { - length := len(data) - return Series{ - Name: name, - Array: array.NewInt64Data( - array.NewData(mapBowToArrowTypes[Int64], length, - []*memory.Buffer{ - memory.NewBufferBytes(valid), - memory.NewBufferBytes(arrow.Int64Traits.CastToBytes(data)), - }, nil, length-bitutil.CountSetBits(valid, 0, length), 0), - ), +func getBowTypeFromInterfaces(colBasedData []interface{}) (Type, error) { + for _, val := range colBasedData { + if val != nil { + switch val.(type) { + case float64, json.Number: + return Float64, nil + case int, int64: + return Int64, nil + case string: + return String, nil + case bool: + return Boolean, nil + } + } } -} -func newFloat64Series(name string, data []float64, valid []byte) Series { - length := len(data) - return Series{ - Name: name, - Array: array.NewFloat64Data( - array.NewData(mapBowToArrowTypes[Float64], length, - []*memory.Buffer{ - memory.NewBufferBytes(valid), - memory.NewBufferBytes(arrow.Float64Traits.CastToBytes(data)), - }, nil, length-bitutil.CountSetBits(valid, 0, length), 0), - ), - } + return Float64, nil } -func newBooleanSeries(name string, data []bool, valid []byte) Series { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - builder := array.NewBooleanBuilder(mem) - defer builder.Release() - builder.AppendValues(data, buildNullBitmapBool(len(data), valid)) - return Series{Name: name, Array: builder.NewArray()} -} +func buildNullBitmapBytes(dataLength int, validityArray interface{}) []byte { + var res []byte + nullBitmapLength := bitutil.CeilByte(dataLength) / 8 -func newStringSeries(name string, data []string, valid []byte) Series { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - builder := array.NewStringBuilder(mem) - defer builder.Release() - builder.AppendValues(data, buildNullBitmapBool(len(data), valid)) - return Series{Name: name, Array: builder.NewArray()} + switch valid := validityArray.(type) { + case nil: + res = make([]byte, nullBitmapLength) + for i := 0; i < dataLength; i++ { + bitutil.SetBit(res, i) + } + case []bool: + if len(valid) != dataLength { + panic(fmt.Errorf("dataArray and validityArray have different lengths")) + } + res = make([]byte, nullBitmapLength) + for i := 0; i < dataLength; i++ { + if valid[i] { + bitutil.SetBit(res, i) + } + } + case []byte: + if len(valid) != nullBitmapLength { + panic(fmt.Errorf("dataArray and validityArray have different lengths")) + } + return valid + default: + panic(fmt.Errorf("unsupported type '%T'", valid)) + } + + return res } func buildNullBitmapBool(dataLength int, validityArray interface{}) []bool { @@ -198,22 +259,3 @@ func buildNullBitmapBool(dataLength int, validityArray interface{}) []bool { panic(fmt.Errorf("unsupported type '%T'", valid)) } } - -func getBowTypeFromInterfaces(colBasedData []interface{}) (Type, error) { - for _, val := range colBasedData { - if val != nil { - switch val.(type) { - case float64, json.Number: - return Float64, nil - case int, int64: - return Int64, nil - case string: - return String, nil - case bool: - return Boolean, nil - } - } - } - - return Float64, nil -} From 583fc96ef0f054db4d4e65125316b2cec0f579ab Mon Sep 17 00:00:00 2001 From: agelloz Date: Thu, 5 May 2022 09:39:50 +0200 Subject: [PATCH 25/29] update changelog --- CHANGELOG.md | 3 --- 1 file changed, 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1ff84ac..1da3107 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,10 +9,7 @@ UNRELEASED [XXXX-XX-XX] - improve documentation - improve error handling - improve code readability -<<<<<<< HEAD - remove code gen to prepare for Timestamp support -======= ->>>>>>> dev v0.18.0 [2022-02-16] ------------------- From 98407744b3c70e352229422011f4f695392f650a Mon Sep 17 00:00:00 2001 From: agelloz Date: Thu, 5 May 2022 10:28:26 +0200 Subject: [PATCH 26/29] review Toussaint --- bowjson.go | 8 +------- bowtypes.go | 7 +++++++ 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/bowjson.go b/bowjson.go index 03c82a8..ec66f9e 100644 --- a/bowjson.go +++ b/bowjson.go @@ -95,13 +95,7 @@ func (b *bow) NewValuesFromJSON(jsonB JSONBow) error { */ for fieldIndex, field := range jsonB.Schema.Fields { - ok := false - for _, arrowType := range mapBowToArrowTypes { - if arrowType.Name() == field.Type { - ok = true - } - } - if ok { + if _, ok := mapArrowNameToBowTypes[field.Type]; ok { continue } switch field.Type { diff --git a/bowtypes.go b/bowtypes.go index 5275f86..f8a8be0 100644 --- a/bowtypes.go +++ b/bowtypes.go @@ -38,6 +38,13 @@ var ( Boolean: arrow.FixedWidthTypes.Boolean, String: arrow.BinaryTypes.String, } + mapArrowNameToBowTypes = func() map[string]Type { + res := make(map[string]Type) + for bowType, arrowDataType := range mapBowToArrowTypes { + res[arrowDataType.Name()] = bowType + } + return res + }() allType = func() []Type { res := make([]Type, InputDependent-1) for typ := Type(1); typ < InputDependent; typ++ { From cbfc5cd1b0a6af00333b6062cad2ff352cbc31a0 Mon Sep 17 00:00:00 2001 From: agelloz Date: Thu, 5 May 2022 10:38:10 +0200 Subject: [PATCH 27/29] review Toussaint --- bowjson.go | 10 +++++----- bowtypes.go | 25 +++++++++++++++---------- 2 files changed, 20 insertions(+), 15 deletions(-) diff --git a/bowjson.go b/bowjson.go index ec66f9e..e5e33cc 100644 --- a/bowjson.go +++ b/bowjson.go @@ -111,10 +111,10 @@ func (b *bow) NewValuesFromJSON(jsonB JSONBow) error { series := make([]Series, len(jsonB.Schema.Fields)) if jsonB.RowBasedData == nil { - for i, field := range jsonB.Schema.Fields { - typ := getBowTypeFromArrowFingerprint(field.Type) + for fieldIndex, field := range jsonB.Schema.Fields { + typ := getBowTypeFromArrowName(field.Type) buf := NewBuffer(0, typ) - series[i] = NewSeriesFromBuffer(field.Name, buf) + series[fieldIndex] = NewSeriesFromBuffer(field.Name, buf) } tmpBow, err := NewBow(series...) @@ -127,8 +127,8 @@ func (b *bow) NewValuesFromJSON(jsonB JSONBow) error { } for fieldIndex, field := range jsonB.Schema.Fields { - fieldType := getBowTypeFromArrowName(field.Type) - buf := NewBuffer(len(jsonB.RowBasedData), fieldType) + typ := getBowTypeFromArrowName(field.Type) + buf := NewBuffer(len(jsonB.RowBasedData), typ) for rowIndex, row := range jsonB.RowBasedData { buf.SetOrDrop(rowIndex, row[field.Name]) } diff --git a/bowtypes.go b/bowtypes.go index f8a8be0..51ff85b 100644 --- a/bowtypes.go +++ b/bowtypes.go @@ -45,6 +45,13 @@ var ( } return res }() + mapArrowFingerprintToBowTypes = func() map[string]Type { + res := make(map[string]Type) + for bowType, arrowDataType := range mapBowToArrowTypes { + res[arrowDataType.Fingerprint()] = bowType + } + return res + }() allType = func() []Type { res := make([]Type, InputDependent-1) for typ := Type(1); typ < InputDependent; typ++ { @@ -89,21 +96,19 @@ func (t Type) String() string { } func getBowTypeFromArrowFingerprint(fingerprint string) Type { - for bowType, arrowType := range mapBowToArrowTypes { - if arrowType.Fingerprint() == fingerprint { - return bowType - } + typ, ok := mapArrowFingerprintToBowTypes[fingerprint] + if !ok { + return Unknown } - return Unknown + return typ } func getBowTypeFromArrowName(name string) Type { - for bowType, arrowType := range mapBowToArrowTypes { - if arrowType.Name() == name { - return bowType - } + typ, ok := mapArrowNameToBowTypes[name] + if !ok { + return Unknown } - return Unknown + return typ } // GetAllTypes returns all Bow types. From 8c440ccd96836e89b39d5d362c6c1347620e8de8 Mon Sep 17 00:00:00 2001 From: agelloz Date: Thu, 5 May 2022 10:51:16 +0200 Subject: [PATCH 28/29] remove unused function --- bowrecord.go | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/bowrecord.go b/bowrecord.go index 510721c..eec6b67 100644 --- a/bowrecord.go +++ b/bowrecord.go @@ -8,16 +8,6 @@ import ( "github.com/apache/arrow/go/v8/arrow/array" ) -func NewBowFromRecord(record arrow.Record) (Bow, error) { - for _, f := range record.Schema().Fields() { - if getBowTypeFromArrowFingerprint(f.Type.Fingerprint()) == Unknown { - return nil, fmt.Errorf("unsupported type '%s'", f.Type) - } - } - - return &bow{Record: record}, nil -} - func newRecord(metadata Metadata, series ...Series) (arrow.Record, error) { var fields []arrow.Field var arrays []arrow.Array From 86993da6ed7a6ba2d1cc5a5715205cb01d387183 Mon Sep 17 00:00:00 2001 From: agelloz Date: Wed, 11 May 2022 12:36:42 +0200 Subject: [PATCH 29/29] review Toussaint --- XXXexamples_test.go | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/XXXexamples_test.go b/XXXexamples_test.go index 9ed640a..08fbce7 100644 --- a/XXXexamples_test.go +++ b/XXXexamples_test.go @@ -53,9 +53,9 @@ func ExampleNewBowFromRowBasedInterfaces() { []string{"time", "int", "float"}, []Type{TimestampMilli, Int64, Float64}, [][]interface{}{ - {"2022-04-27T00:00:00Z", 1, 1.1}, - {"2022-04-27T01:00:00Z", 2, 2.2}, - {"2022-04-27T02:00:00Z", 3, 3.3}, + {"2022-04-27T01:00:00.001Z", 1, 1.1}, + {"2022-04-27T02:00:00.002Z", 2, 2.2}, + {"2022-04-27T03:00:00.003Z", 3, 3.3}, }) if err != nil { panic(err) @@ -64,9 +64,9 @@ func ExampleNewBowFromRowBasedInterfaces() { fmt.Println(b) // Output: // time:timestamp[ms, tz=UTC] int:int64 float:float64 - // 2022-04-27T00:00:00Z 1 1.1 - // 2022-04-27T01:00:00Z 2 2.2 - // 2022-04-27T02:00:00Z 3 3.3 + // 2022-04-27T01:00:00.001Z 1 1.1 + // 2022-04-27T02:00:00.002Z 2 2.2 + // 2022-04-27T03:00:00.003Z 3 3.3 } func ExampleBow_MarshalJSON() {