Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
45 commits
Select commit Hold shift + click to select a range
d2e0ec0
first
Apr 12, 2022
e2cdb5f
first
Apr 13, 2022
7d917db
doc
Apr 25, 2022
d76dffb
corrections
Apr 25, 2022
27f022a
first
Apr 25, 2022
21474d5
Merge branch 'update' into v0.19.0
Apr 26, 2022
c43a4d3
clean up
Apr 28, 2022
ae8973c
Merge branch 'update' into doc
Apr 28, 2022
235dc92
Merge branch 'doc' into v0.19.0
Apr 28, 2022
4ea1ec8
clean up
Apr 28, 2022
4335274
first
Apr 28, 2022
33900cb
corrections
Apr 28, 2022
c304b05
Merge branch 'doc' into gen
Apr 28, 2022
6901596
revert Boolean renaming
Apr 28, 2022
15bf18f
Buffer
Apr 28, 2022
f016a29
first
Apr 28, 2022
e098601
corrections
Apr 28, 2022
3b002c1
corrections
Apr 28, 2022
8f3efd6
Merge branch 'buffers-series' into v0.19.0
Apr 28, 2022
3026ef4
clean up
May 2, 2022
d6515a8
clean up
May 2, 2022
2462da9
Merge branch 'doc' into gen
May 2, 2022
f2bb9ea
Merge branch 'gen' into buffers-series
May 2, 2022
5d89ec5
Merge branch 'buffers-series' into v0.19.0
May 2, 2022
379c90c
changelog
May 2, 2022
795de54
Merge branch 'doc' into gen
May 2, 2022
d61360e
CHANGELOG
May 2, 2022
fa430c9
Merge branch 'gen' into buffers-series
May 2, 2022
107c067
CHANGELOG
May 2, 2022
8793e2a
Merge branch 'buffers-series' into v0.19.0
May 2, 2022
2b956c6
changelog
May 2, 2022
c3a18d5
simplifications
May 4, 2022
b4dc811
simplifications
May 4, 2022
e2f881f
simplifications
May 4, 2022
f165c31
simplifications
May 4, 2022
9d5d780
Merge branch 'buffers-series' into v0.19.0
May 4, 2022
215ebcf
Merge branch 'dev' into gen
May 5, 2022
583fc96
update changelog
May 5, 2022
454487a
simplifications
May 5, 2022
9840774
review Toussaint
May 5, 2022
cbfc5cd
review Toussaint
May 5, 2022
8c440cc
remove unused function
May 5, 2022
7f1e217
review Toussaint
May 5, 2022
aab2076
Merge branch 'dev' into v0.19.0
May 11, 2022
86993da
review Toussaint
May 11, 2022
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,11 @@ UNRELEASED [XXXX-XX-XX]
- improve error handling
- improve code readability
- remove code gen to prepare for Timestamp support
- Types
- add Bow data type in Buffer to decouple Go native types from Arrow types
- add support for Arrow Timestamp type
- Parquet
- use the new arrow/go/v8/parquet package instead of xitongsys/parquet-go

v0.18.0 [2022-02-16]
-------------------
Expand Down
25 changes: 12 additions & 13 deletions XXXexamples_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,25 +49,24 @@ func ExampleNewBowFromColBasedInterfaces() {
}

func ExampleNewBowFromRowBasedInterfaces() {
colNames := []string{"time", "value", "valueFromJSON"}
colTypes := []Type{Int64, Int64, Float64}
rowBasedData := [][]interface{}{
{1, 1, json.Number("1.1")},
{1.2, json.Number("1.2"), 2},
{json.Number("3"), 3, 1.3},
}

b, err := NewBowFromRowBasedInterfaces(colNames, colTypes, rowBasedData)
b, err := NewBowFromRowBasedInterfaces(
[]string{"time", "int", "float"},
[]Type{TimestampMilli, Int64, Float64},
[][]interface{}{
{"2022-04-27T01:00:00.001Z", 1, 1.1},
{"2022-04-27T02:00:00.002Z", 2, 2.2},
{"2022-04-27T03:00:00.003Z", 3, 3.3},
})
if err != nil {
panic(err)
}

fmt.Println(b)
// Output:
// time:int64 value:int64 valueFromJSON:float64
// 1 1 1.1
// 1 <nil> 2
// 3 3 1.3
// time:timestamp[ms, tz=UTC] int:int64 float:float64
// 2022-04-27T01:00:00.001Z 1 1.1
// 2022-04-27T02:00:00.002Z 2 2.2
// 2022-04-27T03:00:00.003Z 3 3.3
}

func ExampleBow_MarshalJSON() {
Expand Down
2 changes: 0 additions & 2 deletions bow.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@ import (
"errors"
"fmt"
"reflect"
"time"

"github.com/apache/arrow/go/v8/arrow"
"github.com/apache/arrow/go/v8/arrow/array"
Expand Down Expand Up @@ -88,7 +87,6 @@ type Bow interface {
UnmarshalJSON(data []byte) error
NewValuesFromJSON(jsonB JSONBow) error
WriteParquet(path string, verbose bool) error
GetParquetMetaColTimeUnit(colIndex int) (time.Duration, error)
}

type bow struct {
Expand Down
15 changes: 15 additions & 0 deletions bowappend.go
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,21 @@ func AppendBows(bows ...Bow) (Bow, error) {
builder.AppendValues(v, valid)
}
newArray = builder.NewArray()
case TimestampSec, TimestampMilli, TimestampMicro, TimestampNano:
builder := array.NewTimestampBuilder(mem, mapBowToArrowTypes[refType].(*arrow.TimestampType))
builder.Resize(numRows)
for _, b := range bows {
if colType := b.ColumnType(colIndex); colType != refType {
return nil, fmt.Errorf(
"incompatible types '%s' and '%s'", refType, colType)
}
data := b.(*bow).Column(colIndex).Data()
arr := array.NewTimestampData(data)
v := timestampValues(arr)
valid := getValiditySlice(arr)
builder.AppendValues(v, valid)
}
newArray = builder.NewArray()
default:
return nil, fmt.Errorf("unsupported type '%s'", refType)
}
Expand Down
30 changes: 29 additions & 1 deletion bowbuffer.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import (
"fmt"
"sort"

"github.com/apache/arrow/go/v8/arrow"
"github.com/apache/arrow/go/v8/arrow/array"
"github.com/apache/arrow/go/v8/arrow/bitutil"
)
Expand Down Expand Up @@ -33,6 +34,8 @@ func NewBuffer(size int, typ Type) Buffer {
buf.Data = make([]bool, size)
case String:
buf.Data = make([]string, size)
case TimestampSec, TimestampMilli, TimestampMicro, TimestampNano:
buf.Data = make([]arrow.Timestamp, size)
default:
panic(fmt.Errorf("unsupported type '%s'", typ))
}
Expand All @@ -50,6 +53,8 @@ func (b Buffer) Len() int {
return len(b.Data.([]bool))
case String:
return len(b.Data.([]string))
case TimestampSec, TimestampMilli, TimestampMicro, TimestampNano:
return len(b.Data.([]arrow.Timestamp))
default:
panic(fmt.Errorf("unsupported type '%s'", b.DataType))
}
Expand All @@ -68,6 +73,8 @@ func (b *Buffer) SetOrDrop(i int, value interface{}) {
b.Data.([]bool)[i], valid = Boolean.Convert(value).(bool)
case String:
b.Data.([]string)[i], valid = String.Convert(value).(string)
case TimestampSec, TimestampMilli, TimestampMicro, TimestampNano:
b.Data.([]arrow.Timestamp)[i], valid = b.DataType.Convert(value).(arrow.Timestamp)
default:
panic(fmt.Errorf("unsupported type '%s'", b.DataType))
}
Expand All @@ -92,6 +99,16 @@ func (b *Buffer) SetOrDropStrict(i int, value interface{}) {
b.Data.([]bool)[i], valid = value.(bool)
case String:
b.Data.([]string)[i], valid = value.(string)
case TimestampSec, TimestampMilli, TimestampMicro, TimestampNano:
valid = true
switch value := value.(type) {
case arrow.Timestamp:
b.Data.([]arrow.Timestamp)[i] = value
case int64:
b.Data.([]int64)[i] = value
default:
valid = false
}
default:
panic(fmt.Errorf("unsupported type '%s'", b.DataType))
}
Expand All @@ -103,7 +120,6 @@ func (b *Buffer) SetOrDropStrict(i int, value interface{}) {
}
}

// GetValue gets the value at index `i` from the Buffer
func (b *Buffer) GetValue(i int) interface{} {
if bitutil.BitIsNotSet(b.nullBitmapBytes, i) {
return nil
Expand All @@ -118,6 +134,8 @@ func (b *Buffer) GetValue(i int) interface{} {
return b.Data.([]bool)[i]
case String:
return b.Data.([]string)[i]
case TimestampSec, TimestampMilli, TimestampMicro, TimestampNano:
return b.Data.([]arrow.Timestamp)[i]
default:
panic(fmt.Errorf("unsupported type '%s'", b.DataType))
}
Expand All @@ -133,6 +151,8 @@ func (b Buffer) Less(i, j int) bool {
return b.Data.([]string)[i] < b.Data.([]string)[j]
case Boolean:
return !b.Data.([]bool)[i] && b.Data.([]bool)[j]
case TimestampSec, TimestampMilli, TimestampMicro, TimestampNano:
return b.Data.([]arrow.Timestamp)[i] < b.Data.([]arrow.Timestamp)[j]
default:
panic(fmt.Errorf("unsupported type '%s'", b.DataType))
}
Expand Down Expand Up @@ -170,9 +190,17 @@ func (b *bow) NewBufferFromCol(colIndex int) Buffer {
copy(nullBitmapBytesCopy, nullBitmapBytes)
res.Data = stringValues(arr)
res.nullBitmapBytes = nullBitmapBytesCopy
case TimestampSec, TimestampMilli, TimestampMicro, TimestampNano:
arr := array.NewTimestampData(data)
nullBitmapBytes := arr.NullBitmapBytes()[:bitutil.CeilByte(arr.Data().Len())/8]
nullBitmapBytesCopy := make([]byte, len(nullBitmapBytes))
copy(nullBitmapBytesCopy, nullBitmapBytes)
res.Data = timestampValues(arr)
res.nullBitmapBytes = nullBitmapBytesCopy
default:
panic(fmt.Errorf("unsupported type '%s'", b.ColumnType(colIndex)))
}

return res
}

Expand Down
94 changes: 82 additions & 12 deletions bowconvert.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@ import (
"encoding/json"
"fmt"
"strconv"

"github.com/apache/arrow/go/v8/arrow"
)

// ToInt64 attempts to convert `input` to int64.
Expand All @@ -28,17 +30,23 @@ func ToInt64(input interface{}) (output int64, ok bool) {
case float64:
return int64(input), true
case bool:
if input {
return 1, true
}
return 0, true
return boolToInt64(input)
case string:
output, err := strconv.ParseInt(input, 10, 64)
return output, err == nil
case arrow.Timestamp:
return int64(input), true
}
return
}

func boolToInt64(input bool) (int64, bool) {
if input {
return 1, true
}
return 0, true
}

// ToFloat64 attempts to convert `input` to float64.
// Return also a false boolean if the conversion failed.
func ToFloat64(input interface{}) (output float64, ok bool) {
Expand All @@ -61,17 +69,23 @@ func ToFloat64(input interface{}) (output float64, ok bool) {
case float32:
return float64(input), true
case bool:
if input {
return 1., true
}
return 0., true
return boolToFloat64(input)
case string:
output, err := strconv.ParseFloat(input, 64)
return output, err == nil
case arrow.Timestamp:
return float64(input), true
}
return
}

func boolToFloat64(input bool) (float64, bool) {
if input {
return 1., true
}
return 0., true
}

// ToBoolean attempts to convert `input` to bool.
// Return also a false boolean if the conversion failed.
// In case of numeric type, returns true if the value is non-zero.
Expand Down Expand Up @@ -99,6 +113,8 @@ func ToBoolean(input interface{}) (output bool, ok bool) {
return input != 0., true
case float64:
return input != 0., true
case arrow.Timestamp:
return input != 0, true
}
return
}
Expand All @@ -108,10 +124,7 @@ func ToBoolean(input interface{}) (output bool, ok bool) {
func ToString(input interface{}) (output string, ok bool) {
switch input := input.(type) {
case bool:
if input {
return "true", true
}
return "false", true
return boolToString(input)
case string:
return input, true
case json.Number:
Expand All @@ -130,6 +143,63 @@ func ToString(input interface{}) (output string, ok bool) {
return fmt.Sprintf("%f", input), true
case float64:
return fmt.Sprintf("%f", input), true
case arrow.Timestamp:
return strconv.Itoa(int(input)), true
}
return
}

func boolToString(input bool) (string, bool) {
if input {
return "true", true
}
return "false", true
}

// ToTimestamp returns an arrow.Timestamp value and a bool whether the conversion was successful or not.
// String values are first interpreted with strconv.ParseInt.
// If it fails, the values are parsed with arrow.TimestampFromString with the `timeUnit` time unit.
func ToTimestamp(input interface{}, timeUnit arrow.TimeUnit) (output arrow.Timestamp, ok bool) {
switch input := input.(type) {
case json.Number:
output, err := input.Int64()
return arrow.Timestamp(output), err == nil
case int:
return arrow.Timestamp(input), true
case int8:
return arrow.Timestamp(input), true
case int16:
return arrow.Timestamp(input), true
case int32:
return arrow.Timestamp(input), true
case int64:
return arrow.Timestamp(input), true
case float32:
return arrow.Timestamp(input), true
case float64:
return arrow.Timestamp(input), true
case bool:
return boolToTimestamp(input)
case string:
return stringToTimestamp(input, timeUnit)
case arrow.Timestamp:
return input, true
}
return
}

func boolToTimestamp(input bool) (arrow.Timestamp, bool) {
if input {
return 1, true
}
return 0, true
}

func stringToTimestamp(input string, timeUnit arrow.TimeUnit) (arrow.Timestamp, bool) {
output, err := strconv.ParseInt(input, 10, 64)
if err == nil {
return arrow.Timestamp(output), true
}
outputTS, err := arrow.TimestampFromString(input, timeUnit)
return outputTS, err == nil
}
Loading