Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 34 additions & 0 deletions benchmark_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -332,6 +332,40 @@ func BenchmarkMarshal(b *testing.B) {
}
}

func BenchmarkUnmarshal_100Records(b *testing.B) {
var sb strings.Builder
sb.WriteString("name,age,phone[]\n")
for range 100 {
sb.WriteString("Alice,30,555-1234~555-5678\n")
}
inputBytes := []byte(sb.String())

for b.Loop() {
var people []BenchmarkPerson
if err := csvpp.Unmarshal(bytes.NewReader(inputBytes), &people); err != nil {
b.Fatal(err)
}
}
}

func BenchmarkMarshal_100Records(b *testing.B) {
people := make([]BenchmarkPerson, 100)
for i := range people {
people[i] = BenchmarkPerson{
Name: "Alice",
Age: 30,
Phones: []string{"555-1234", "555-5678"},
}
}

for b.Loop() {
var buf bytes.Buffer
if err := csvpp.Marshal(&buf, people); err != nil {
b.Fatal(err)
}
}
}

// splitByRune Benchmark

func BenchmarkSplitByRune(b *testing.B) {
Expand Down
15 changes: 15 additions & 0 deletions csvpputil/benchmark_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,21 @@ func BenchmarkYAMLArrayWriter_Write(b *testing.B) {
}
}

func BenchmarkYAMLArrayWriter_WriteWithCapacity(b *testing.B) {
b.ReportAllocs()
for b.Loop() {
w := csvpputil.NewYAMLArrayWriter(io.Discard, benchHeaders, csvpputil.WithYAMLCapacity(len(benchRecords)))
for _, record := range benchRecords {
if err := w.Write(record); err != nil {
b.Fatal(err)
}
}
if err := w.Close(); err != nil {
b.Fatal(err)
}
}
}

func BenchmarkJSONArrayWriter_SingleRecord(b *testing.B) {
b.ReportAllocs()
for b.Loop() {
Expand Down
53 changes: 34 additions & 19 deletions csvpputil/yaml_array_writer.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,19 @@ import (
"github.com/osamingo/go-csvpp"
)

// YAMLArrayWriterOption is a functional option for YAMLArrayWriter.
type YAMLArrayWriterOption func(*YAMLArrayWriter)

// WithYAMLCapacity pre-allocates the internal buffer for the expected number of records.
// This reduces memory allocations when the approximate record count is known in advance.
func WithYAMLCapacity(n int) YAMLArrayWriterOption {
return func(w *YAMLArrayWriter) {
if n > 0 {
w.records = make([]yaml.MapSlice, 0, n)
}
}
}

// YAMLArrayWriter writes CSV++ records as a YAML array.
// Due to YAML's structure (go-yaml doesn't support streaming array elements),
// records are buffered until Close.
Expand All @@ -19,11 +32,15 @@ type YAMLArrayWriter struct {
}

// NewYAMLArrayWriter creates a new YAMLArrayWriter that writes to w.
func NewYAMLArrayWriter(w io.Writer, headers []*csvpp.ColumnHeader) *YAMLArrayWriter {
return &YAMLArrayWriter{
func NewYAMLArrayWriter(w io.Writer, headers []*csvpp.ColumnHeader, opts ...YAMLArrayWriterOption) *YAMLArrayWriter {
writer := &YAMLArrayWriter{
w: w,
headers: headers,
}
for _, opt := range opts {
opt(writer)
}
return writer
}

// Write adds a single record to the buffer.
Expand Down Expand Up @@ -57,31 +74,29 @@ func (w *YAMLArrayWriter) Close() error {
// The output is a YAML array where each element is a record.
func MarshalYAML(headers []*csvpp.ColumnHeader, records [][]*csvpp.Field) ([]byte, error) {
var buf bytes.Buffer
w := NewYAMLArrayWriter(&buf, headers)

for _, record := range records {
if err := w.Write(record); err != nil {
return nil, err
}
}

if err := w.Close(); err != nil {
if err := encodeYAMLRecords(&buf, headers, records); err != nil {
return nil, err
}

return buf.Bytes(), nil
}

// WriteYAML writes CSV++ records as a YAML array to the provided writer.
// The output is a YAML array where each element is a record.
func WriteYAML(w io.Writer, headers []*csvpp.ColumnHeader, records [][]*csvpp.Field) error {
writer := NewYAMLArrayWriter(w, headers)
return encodeYAMLRecords(w, headers, records)
}

for _, record := range records {
if err := writer.Write(record); err != nil {
return err
}
// encodeYAMLRecords builds the complete MapSlice array with exact allocation
// and encodes it in one shot. This avoids the overhead of the YAMLArrayWriter's
// per-record append growth.
func encodeYAMLRecords(w io.Writer, headers []*csvpp.ColumnHeader, records [][]*csvpp.Field) error {
ms := make([]yaml.MapSlice, len(records))
for i, record := range records {
ms[i] = fieldsToMapSlice(headers, record)
}

return writer.Close()
enc := yaml.NewEncoder(w)
if err := enc.Encode(ms); err != nil {
return err
}
return enc.Close()
}
71 changes: 71 additions & 0 deletions csvpputil/yaml_array_writer_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,77 @@ func TestYAMLArrayWriter_Write(t *testing.T) {
})
}

func TestYAMLArrayWriter_WriteWithCapacity(t *testing.T) {
t.Parallel()

headers := []*csvpp.ColumnHeader{
{Name: "name", Kind: csvpp.SimpleField},
{Name: "tags", Kind: csvpp.ArrayField},
}

t.Run("success: writer with capacity hint", func(t *testing.T) {
t.Parallel()

var buf bytes.Buffer
w := csvpputil.NewYAMLArrayWriter(&buf, headers, csvpputil.WithYAMLCapacity(2))

records := [][]*csvpp.Field{
{{Value: "Alice"}, {Values: []string{"go"}}},
{{Value: "Bob"}, {Values: []string{"rust", "python"}}},
}

for _, record := range records {
if err := w.Write(record); err != nil {
t.Fatalf("Write() error = %v", err)
}
}

if err := w.Close(); err != nil {
t.Fatalf("Close() error = %v", err)
}

var got []map[string]any
if err := yaml.Unmarshal(buf.Bytes(), &got); err != nil {
t.Fatalf("yaml.Unmarshal() error = %v", err)
}

want := []map[string]any{
{"name": "Alice", "tags": []any{"go"}},
{"name": "Bob", "tags": []any{"rust", "python"}},
}
if diff := cmp.Diff(want, got); diff != "" {
t.Errorf("output mismatch (-want +got):\n%s", diff)
}
})

t.Run("success: zero capacity is safe", func(t *testing.T) {
t.Parallel()

var buf bytes.Buffer
w := csvpputil.NewYAMLArrayWriter(&buf, headers, csvpputil.WithYAMLCapacity(0))

if err := w.Write([]*csvpp.Field{{Value: "Alice"}, {Values: []string{"go"}}}); err != nil {
t.Fatalf("Write() error = %v", err)
}

if err := w.Close(); err != nil {
t.Fatalf("Close() error = %v", err)
}

var got []map[string]any
if err := yaml.Unmarshal(buf.Bytes(), &got); err != nil {
t.Fatalf("yaml.Unmarshal() error = %v", err)
}

want := []map[string]any{
{"name": "Alice", "tags": []any{"go"}},
}
if diff := cmp.Diff(want, got); diff != "" {
t.Errorf("output mismatch (-want +got):\n%s", diff)
}
})
}

func TestMarshalYAML(t *testing.T) {
t.Parallel()

Expand Down
1 change: 1 addition & 0 deletions export_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,4 +13,5 @@ var (
FormatColumnHeader = formatColumnHeader
FormatComponentList = formatComponentList
ExtractTagName = extractTagName
CachedTypeInfo = cachedTypeInfo
)
70 changes: 14 additions & 56 deletions marshal.go
Original file line number Diff line number Diff line change
Expand Up @@ -95,23 +95,23 @@ func MarshalWriter(w *Writer, src any) error {
return fmt.Errorf("csvpp: slice element must be a struct")
}

// Build headers
headers := buildHeaders(elemType)
w.SetHeaders(headers)
// Get cached type info
ti := cachedTypeInfo(elemType)
w.SetHeaders(ti.headers)

// Write headers
if err := w.WriteHeader(); err != nil {
return err
}

// Encode each element
// Encode each element using cached encode field mappings
for i := 0; i < srcVal.Len(); i++ {
elemVal := srcVal.Index(i)
if elemVal.Kind() == reflect.Pointer {
elemVal = elemVal.Elem()
}

record := encodeRecord(elemVal, headers)
record := encodeRecord(elemVal, ti.headers, ti.encodeFields)
if err := w.Write(record); err != nil {
return err
}
Expand All @@ -130,23 +130,14 @@ type fieldMapping struct {

// buildFieldMap creates a mapping between struct fields and headers.
func buildFieldMap(t reflect.Type, headers []*ColumnHeader) []fieldMapping {
ti := cachedTypeInfo(t)
var mappings []fieldMapping

for i := 0; i < t.NumField(); i++ {
field := t.Field(i)
tag := field.Tag.Get("csvpp")
if tag == "" || tag == "-" {
continue
}

// Extract column name from tag (first part is the column name)
tagName := extractTagName(tag)

// Find corresponding column in headers
for _, tn := range ti.tagNames {
for j, h := range headers {
if h.Name == tagName {
if h.Name == tn.tagName {
mappings = append(mappings, fieldMapping{
fieldIndex: i,
fieldIndex: tn.structIndex,
header: h,
columnIndex: j,
})
Expand All @@ -170,31 +161,6 @@ func extractTagName(tag string) string {
return tag
}

// buildHeaders builds headers from a struct.
func buildHeaders(t reflect.Type) []*ColumnHeader {
var headers []*ColumnHeader

for i := 0; i < t.NumField(); i++ {
field := t.Field(i)
tag := field.Tag.Get("csvpp")
if tag == "" || tag == "-" {
continue
}

h, err := parseColumnHeader(tag)
if err != nil {
// Treat as simple field if error
h = &ColumnHeader{
Name: tag,
Kind: SimpleField,
}
}
headers = append(headers, h)
}

return headers
}

// decodeRecord decodes a record into a struct.
func decodeRecord(record []*Field, dst reflect.Value, mappings []fieldMapping) error {
for _, m := range mappings {
Expand Down Expand Up @@ -355,25 +321,17 @@ func decodeStructComponents(components []*Field, dst reflect.Value, headers []*C
}

// encodeRecord encodes a struct to a record.
func encodeRecord(src reflect.Value, headers []*ColumnHeader) []*Field {
func encodeRecord(src reflect.Value, headers []*ColumnHeader, encodeFields []encodeFieldInfo) []*Field {
fields := make([]*Field, 0, len(headers))

fieldIdx := 0
for i := 0; i < src.NumField(); i++ {
structField := src.Type().Field(i)
tag := structField.Tag.Get("csvpp")
if tag == "" || tag == "-" {
continue
}

if fieldIdx >= len(headers) {
for _, ef := range encodeFields {
if ef.headerIndex >= len(headers) {
break
}

field := src.Field(i)
f := encodeField(field, headers[fieldIdx])
field := src.Field(ef.structIndex)
f := encodeField(field, headers[ef.headerIndex])
fields = append(fields, f)
fieldIdx++
}

return fields
Expand Down
Loading
Loading