diff --git a/benchmark_test.go b/benchmark_test.go index 25da4d8..24c41a9 100644 --- a/benchmark_test.go +++ b/benchmark_test.go @@ -332,6 +332,40 @@ func BenchmarkMarshal(b *testing.B) { } } +func BenchmarkUnmarshal_100Records(b *testing.B) { + var sb strings.Builder + sb.WriteString("name,age,phone[]\n") + for range 100 { + sb.WriteString("Alice,30,555-1234~555-5678\n") + } + inputBytes := []byte(sb.String()) + + for b.Loop() { + var people []BenchmarkPerson + if err := csvpp.Unmarshal(bytes.NewReader(inputBytes), &people); err != nil { + b.Fatal(err) + } + } +} + +func BenchmarkMarshal_100Records(b *testing.B) { + people := make([]BenchmarkPerson, 100) + for i := range people { + people[i] = BenchmarkPerson{ + Name: "Alice", + Age: 30, + Phones: []string{"555-1234", "555-5678"}, + } + } + + for b.Loop() { + var buf bytes.Buffer + if err := csvpp.Marshal(&buf, people); err != nil { + b.Fatal(err) + } + } +} + // splitByRune Benchmark func BenchmarkSplitByRune(b *testing.B) { diff --git a/csvpputil/benchmark_test.go b/csvpputil/benchmark_test.go index 01ce064..d2e9718 100644 --- a/csvpputil/benchmark_test.go +++ b/csvpputil/benchmark_test.go @@ -98,6 +98,21 @@ func BenchmarkYAMLArrayWriter_Write(b *testing.B) { } } +func BenchmarkYAMLArrayWriter_WriteWithCapacity(b *testing.B) { + b.ReportAllocs() + for b.Loop() { + w := csvpputil.NewYAMLArrayWriter(io.Discard, benchHeaders, csvpputil.WithYAMLCapacity(len(benchRecords))) + for _, record := range benchRecords { + if err := w.Write(record); err != nil { + b.Fatal(err) + } + } + if err := w.Close(); err != nil { + b.Fatal(err) + } + } +} + func BenchmarkJSONArrayWriter_SingleRecord(b *testing.B) { b.ReportAllocs() for b.Loop() { diff --git a/csvpputil/yaml_array_writer.go b/csvpputil/yaml_array_writer.go index 275cb7e..38a7343 100644 --- a/csvpputil/yaml_array_writer.go +++ b/csvpputil/yaml_array_writer.go @@ -8,6 +8,19 @@ import ( "github.com/osamingo/go-csvpp" ) +// YAMLArrayWriterOption is a functional option for YAMLArrayWriter. +type YAMLArrayWriterOption func(*YAMLArrayWriter) + +// WithYAMLCapacity pre-allocates the internal buffer for the expected number of records. +// This reduces memory allocations when the approximate record count is known in advance. +func WithYAMLCapacity(n int) YAMLArrayWriterOption { + return func(w *YAMLArrayWriter) { + if n > 0 { + w.records = make([]yaml.MapSlice, 0, n) + } + } +} + // YAMLArrayWriter writes CSV++ records as a YAML array. // Due to YAML's structure (go-yaml doesn't support streaming array elements), // records are buffered until Close. @@ -19,11 +32,15 @@ type YAMLArrayWriter struct { } // NewYAMLArrayWriter creates a new YAMLArrayWriter that writes to w. -func NewYAMLArrayWriter(w io.Writer, headers []*csvpp.ColumnHeader) *YAMLArrayWriter { - return &YAMLArrayWriter{ +func NewYAMLArrayWriter(w io.Writer, headers []*csvpp.ColumnHeader, opts ...YAMLArrayWriterOption) *YAMLArrayWriter { + writer := &YAMLArrayWriter{ w: w, headers: headers, } + for _, opt := range opts { + opt(writer) + } + return writer } // Write adds a single record to the buffer. @@ -57,31 +74,29 @@ func (w *YAMLArrayWriter) Close() error { // The output is a YAML array where each element is a record. func MarshalYAML(headers []*csvpp.ColumnHeader, records [][]*csvpp.Field) ([]byte, error) { var buf bytes.Buffer - w := NewYAMLArrayWriter(&buf, headers) - - for _, record := range records { - if err := w.Write(record); err != nil { - return nil, err - } - } - - if err := w.Close(); err != nil { + if err := encodeYAMLRecords(&buf, headers, records); err != nil { return nil, err } - return buf.Bytes(), nil } // WriteYAML writes CSV++ records as a YAML array to the provided writer. // The output is a YAML array where each element is a record. func WriteYAML(w io.Writer, headers []*csvpp.ColumnHeader, records [][]*csvpp.Field) error { - writer := NewYAMLArrayWriter(w, headers) + return encodeYAMLRecords(w, headers, records) +} - for _, record := range records { - if err := writer.Write(record); err != nil { - return err - } +// encodeYAMLRecords builds the complete MapSlice array with exact allocation +// and encodes it in one shot. This avoids the overhead of the YAMLArrayWriter's +// per-record append growth. +func encodeYAMLRecords(w io.Writer, headers []*csvpp.ColumnHeader, records [][]*csvpp.Field) error { + ms := make([]yaml.MapSlice, len(records)) + for i, record := range records { + ms[i] = fieldsToMapSlice(headers, record) } - - return writer.Close() + enc := yaml.NewEncoder(w) + if err := enc.Encode(ms); err != nil { + return err + } + return enc.Close() } diff --git a/csvpputil/yaml_array_writer_test.go b/csvpputil/yaml_array_writer_test.go index bd6b571..78b9482 100644 --- a/csvpputil/yaml_array_writer_test.go +++ b/csvpputil/yaml_array_writer_test.go @@ -118,6 +118,77 @@ func TestYAMLArrayWriter_Write(t *testing.T) { }) } +func TestYAMLArrayWriter_WriteWithCapacity(t *testing.T) { + t.Parallel() + + headers := []*csvpp.ColumnHeader{ + {Name: "name", Kind: csvpp.SimpleField}, + {Name: "tags", Kind: csvpp.ArrayField}, + } + + t.Run("success: writer with capacity hint", func(t *testing.T) { + t.Parallel() + + var buf bytes.Buffer + w := csvpputil.NewYAMLArrayWriter(&buf, headers, csvpputil.WithYAMLCapacity(2)) + + records := [][]*csvpp.Field{ + {{Value: "Alice"}, {Values: []string{"go"}}}, + {{Value: "Bob"}, {Values: []string{"rust", "python"}}}, + } + + for _, record := range records { + if err := w.Write(record); err != nil { + t.Fatalf("Write() error = %v", err) + } + } + + if err := w.Close(); err != nil { + t.Fatalf("Close() error = %v", err) + } + + var got []map[string]any + if err := yaml.Unmarshal(buf.Bytes(), &got); err != nil { + t.Fatalf("yaml.Unmarshal() error = %v", err) + } + + want := []map[string]any{ + {"name": "Alice", "tags": []any{"go"}}, + {"name": "Bob", "tags": []any{"rust", "python"}}, + } + if diff := cmp.Diff(want, got); diff != "" { + t.Errorf("output mismatch (-want +got):\n%s", diff) + } + }) + + t.Run("success: zero capacity is safe", func(t *testing.T) { + t.Parallel() + + var buf bytes.Buffer + w := csvpputil.NewYAMLArrayWriter(&buf, headers, csvpputil.WithYAMLCapacity(0)) + + if err := w.Write([]*csvpp.Field{{Value: "Alice"}, {Values: []string{"go"}}}); err != nil { + t.Fatalf("Write() error = %v", err) + } + + if err := w.Close(); err != nil { + t.Fatalf("Close() error = %v", err) + } + + var got []map[string]any + if err := yaml.Unmarshal(buf.Bytes(), &got); err != nil { + t.Fatalf("yaml.Unmarshal() error = %v", err) + } + + want := []map[string]any{ + {"name": "Alice", "tags": []any{"go"}}, + } + if diff := cmp.Diff(want, got); diff != "" { + t.Errorf("output mismatch (-want +got):\n%s", diff) + } + }) +} + func TestMarshalYAML(t *testing.T) { t.Parallel() diff --git a/export_test.go b/export_test.go index 25ea87f..cdcdbbc 100644 --- a/export_test.go +++ b/export_test.go @@ -13,4 +13,5 @@ var ( FormatColumnHeader = formatColumnHeader FormatComponentList = formatComponentList ExtractTagName = extractTagName + CachedTypeInfo = cachedTypeInfo ) diff --git a/marshal.go b/marshal.go index 35523d6..759134c 100644 --- a/marshal.go +++ b/marshal.go @@ -95,23 +95,23 @@ func MarshalWriter(w *Writer, src any) error { return fmt.Errorf("csvpp: slice element must be a struct") } - // Build headers - headers := buildHeaders(elemType) - w.SetHeaders(headers) + // Get cached type info + ti := cachedTypeInfo(elemType) + w.SetHeaders(ti.headers) // Write headers if err := w.WriteHeader(); err != nil { return err } - // Encode each element + // Encode each element using cached encode field mappings for i := 0; i < srcVal.Len(); i++ { elemVal := srcVal.Index(i) if elemVal.Kind() == reflect.Pointer { elemVal = elemVal.Elem() } - record := encodeRecord(elemVal, headers) + record := encodeRecord(elemVal, ti.headers, ti.encodeFields) if err := w.Write(record); err != nil { return err } @@ -130,23 +130,14 @@ type fieldMapping struct { // buildFieldMap creates a mapping between struct fields and headers. func buildFieldMap(t reflect.Type, headers []*ColumnHeader) []fieldMapping { + ti := cachedTypeInfo(t) var mappings []fieldMapping - for i := 0; i < t.NumField(); i++ { - field := t.Field(i) - tag := field.Tag.Get("csvpp") - if tag == "" || tag == "-" { - continue - } - - // Extract column name from tag (first part is the column name) - tagName := extractTagName(tag) - - // Find corresponding column in headers + for _, tn := range ti.tagNames { for j, h := range headers { - if h.Name == tagName { + if h.Name == tn.tagName { mappings = append(mappings, fieldMapping{ - fieldIndex: i, + fieldIndex: tn.structIndex, header: h, columnIndex: j, }) @@ -170,31 +161,6 @@ func extractTagName(tag string) string { return tag } -// buildHeaders builds headers from a struct. -func buildHeaders(t reflect.Type) []*ColumnHeader { - var headers []*ColumnHeader - - for i := 0; i < t.NumField(); i++ { - field := t.Field(i) - tag := field.Tag.Get("csvpp") - if tag == "" || tag == "-" { - continue - } - - h, err := parseColumnHeader(tag) - if err != nil { - // Treat as simple field if error - h = &ColumnHeader{ - Name: tag, - Kind: SimpleField, - } - } - headers = append(headers, h) - } - - return headers -} - // decodeRecord decodes a record into a struct. func decodeRecord(record []*Field, dst reflect.Value, mappings []fieldMapping) error { for _, m := range mappings { @@ -355,25 +321,17 @@ func decodeStructComponents(components []*Field, dst reflect.Value, headers []*C } // encodeRecord encodes a struct to a record. -func encodeRecord(src reflect.Value, headers []*ColumnHeader) []*Field { +func encodeRecord(src reflect.Value, headers []*ColumnHeader, encodeFields []encodeFieldInfo) []*Field { fields := make([]*Field, 0, len(headers)) - fieldIdx := 0 - for i := 0; i < src.NumField(); i++ { - structField := src.Type().Field(i) - tag := structField.Tag.Get("csvpp") - if tag == "" || tag == "-" { - continue - } - - if fieldIdx >= len(headers) { + for _, ef := range encodeFields { + if ef.headerIndex >= len(headers) { break } - field := src.Field(i) - f := encodeField(field, headers[fieldIdx]) + field := src.Field(ef.structIndex) + f := encodeField(field, headers[ef.headerIndex]) fields = append(fields, f) - fieldIdx++ } return fields diff --git a/type_cache.go b/type_cache.go new file mode 100644 index 0000000..33ce054 --- /dev/null +++ b/type_cache.go @@ -0,0 +1,91 @@ +package csvpp + +import ( + "reflect" + "sync" +) + +// typeInfo holds cached reflection data for a struct type. +// It is computed once per reflect.Type and reused across all +// Marshal/Unmarshal calls for that type. +// All fields are immutable after creation, making it safe for concurrent access. +// The source of truth is struct tags, which are compile-time constants, +// so the cached values can never become stale. +// Callers must NOT mutate the returned slices (headers, encodeFields, tagNames). +type typeInfo struct { + headers []*ColumnHeader + encodeFields []encodeFieldInfo + tagNames []tagNameInfo +} + +// encodeFieldInfo maps a struct field index to its position in the headers slice. +// This eliminates per-record tag scanning in encodeRecord. +type encodeFieldInfo struct { + structIndex int // index in the struct (for reflect.Value.Field) + headerIndex int // index in the headers slice +} + +// tagNameInfo holds the extracted tag name for a struct field, +// used by buildFieldMap to match against CSV headers without re-parsing tags. +type tagNameInfo struct { + structIndex int + tagName string +} + +// typeInfoCache caches typeInfo by reflect.Type. +// This follows the same pattern as encoding/json's fieldCache. +// Entries are never evicted; this is safe because the key space (distinct struct types +// used with Marshal/Unmarshal) is bounded by the application's type definitions. +var typeInfoCache sync.Map // map[reflect.Type]*typeInfo + +// cachedTypeInfo returns the cached typeInfo for the given type, +// computing it on first access. +func cachedTypeInfo(t reflect.Type) *typeInfo { + if ti, ok := typeInfoCache.Load(t); ok { + return ti.(*typeInfo) + } + ti, _ := typeInfoCache.LoadOrStore(t, newTypeInfo(t)) + return ti.(*typeInfo) +} + +// newTypeInfo computes typeInfo for a struct type. +// This consolidates the tag-scanning logic from buildHeaders, +// buildFieldMap, and encodeRecord into a single pass. +func newTypeInfo(t reflect.Type) *typeInfo { + ti := &typeInfo{} + + headerIdx := 0 + for i := range t.NumField() { + field := t.Field(i) + tag := field.Tag.Get("csvpp") + if tag == "" || tag == "-" { + continue + } + + // Parse header from tag. + h, err := parseColumnHeader(tag) + if err != nil { + h = &ColumnHeader{ + Name: tag, + Kind: SimpleField, + } + } + ti.headers = append(ti.headers, h) + + // Store encode mapping. + ti.encodeFields = append(ti.encodeFields, encodeFieldInfo{ + structIndex: i, + headerIndex: headerIdx, + }) + + // Store tag name for decode mapping. + ti.tagNames = append(ti.tagNames, tagNameInfo{ + structIndex: i, + tagName: extractTagName(tag), + }) + + headerIdx++ + } + + return ti +}