Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 12 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
[![Go Report Card](https://goreportcard.com/badge/github.com/osamingo/go-csvpp)](https://goreportcard.com/report/github.com/osamingo/go-csvpp)
[![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)

A Go implementation of the [IETF CSV++ specification](https://datatracker.ietf.org/doc/draft-mscaldas-csvpp/) (draft-mscaldas-csvpp-01).
A Go implementation of the [IETF CSV++ specification](https://datatracker.ietf.org/doc/draft-mscaldas-csvpp/) (draft-mscaldas-csvpp-02).

CSV++ extends traditional CSV to support **arrays** and **structured fields** within cells, enabling complex data representation while maintaining CSV's simplicity.

Expand Down Expand Up @@ -184,9 +184,12 @@ CSV++ supports four field types in headers:

### Default Delimiters

- Array delimiter: `~` (tilde)
- Array delimiter: `~` (tilde) — applies only to top-level arrays
- Component delimiter: `^` (caret)

Per draft-02, the default tilde for empty brackets (`[]`) applies only to first-level arrays.
Nested arrays **must** specify an explicit delimiter (e.g., `values[;]`).

Custom delimiters can be specified in the header:
- `phone[|]` - uses `|` as array delimiter
- `geo;(lat;lon)` - uses `;` as component delimiter
Expand Down Expand Up @@ -307,7 +310,13 @@ if csvpp.HasFormulaPrefix(value) {
## Specification

This implementation follows the IETF CSV++ specification:
- [draft-mscaldas-csvpp-01](https://datatracker.ietf.org/doc/draft-mscaldas-csvpp/)
- [draft-mscaldas-csvpp-02](https://datatracker.ietf.org/doc/draft-mscaldas-csvpp/02/)

### Conformance Notes

The following draft-02 features are **not yet implemented**:

- **Leaf-only quoting validation**: draft-02 requires that RFC 4180 double-quote quoting is applied only to leaf elements (atomic values). Quoting non-leaf values (entire arrays or structured fields) is invalid per the specification. This library delegates CSV-level quoting to `encoding/csv`, which processes quotes before CSV++ delimiter splitting. As a result, non-leaf quoting cannot be detected or rejected at the CSV++ layer. In practice, the behavior is correct for well-formed input.

## License

Expand Down
7 changes: 5 additions & 2 deletions doc.go
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Package csvpp implements the IETF CSV++ specification (draft-mscaldas-csvpp-01).
// Package csvpp implements the IETF CSV++ specification (draft-mscaldas-csvpp-02).
//
// CSV++ extends traditional CSV to support arrays and structured fields within cells,
// enabling complex data representation while maintaining CSV's simplicity.
Expand All @@ -9,10 +9,13 @@
// CSV++ introduces four field types beyond simple text values:
//
// - Simple: "name" - plain text value
// - Array: "tags[]" - multiple values separated by a delimiter (default: ~)
// - Array: "tags[]" - multiple values separated by a delimiter (default: ~ at top level only)
// - Structured: "geo(lat^lon)" - named components separated by a delimiter (default: ^)
// - ArrayStructured: "addresses[](street^city)" - array of structured values
//
// Per draft-02, the default tilde (~) delimiter for empty brackets applies only to
// top-level (first-level) arrays. Nested arrays MUST explicitly specify a delimiter.
//
// These field types are represented by the [FieldKind] constants:
// [SimpleField], [ArrayField], [StructuredField], and [ArrayStructuredField].
//
Expand Down
2 changes: 1 addition & 1 deletion export_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ var (
ParseColumnHeaderWithDepth = parseColumnHeaderWithDepth
ParseHeaderRecordWithMaxDepth = parseHeaderRecordWithMaxDepth
ParseName = parseName
ParseArrayDelimiter = parseArrayDelimiter
ParseArrayDelimiterWithDepth = parseArrayDelimiterWithDepth
IsFieldChar = isFieldChar
SplitByRune = splitByRune
SplitByDelimiter = splitByDelimiter
Expand Down
15 changes: 10 additions & 5 deletions header.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ func parseColumnHeaderWithDepth(s string, depth, maxDepth int) (*ColumnHeader, e
// 2. Extract array delimiter if "[" is present
// ABNF: array-field = name "[" [delimiter] "]"
if strings.HasPrefix(remaining, "[") {
delim, rest, err := parseArrayDelimiter(remaining)
delim, rest, err := parseArrayDelimiterWithDepth(remaining, depth)
if err != nil {
return nil, err
}
Expand Down Expand Up @@ -112,10 +112,12 @@ func isFieldChar(r rune) bool {
r == '_' || r == '-'
}

// parseArrayDelimiter extracts the "[" delimiter "]" part per IETF CSV++ Section 2.2.2.
// parseArrayDelimiterWithDepth extracts the "[" delimiter "]" part per IETF CSV++ Section 2.2.2.
// ABNF: array-field = name "[" [delimiter] "]"
// If no delimiter is specified, DefaultArrayDelimiter (~) is used.
func parseArrayDelimiter(s string) (delim rune, rest string, err error) {
// If no delimiter is specified at the top level (depth == 0), DefaultArrayDelimiter (~) is used.
// Per draft-02, nested arrays (depth > 0) MUST explicitly specify a delimiter;
// empty brackets are invalid because the default tilde is already consumed by the outer level.
func parseArrayDelimiterWithDepth(s string, depth int) (delim rune, rest string, err error) {
if !strings.HasPrefix(s, "[") {
return 0, s, nil
}
Expand All @@ -133,7 +135,10 @@ func parseArrayDelimiter(s string) (delim rune, rest string, err error) {
rest = after

if raw == "" {
// Use default delimiter
if depth > 0 {
return 0, "", fmt.Errorf("%w: nested arrays must specify an explicit delimiter (empty brackets are invalid at depth > 0)", ErrInvalidHeader)
}
// Use default delimiter only at top level
delim = DefaultArrayDelimiter
} else {
r, size := utf8.DecodeRuneInString(raw)
Expand Down
58 changes: 51 additions & 7 deletions header_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,24 @@ func TestParseColumnHeader(t *testing.T) {
},
},
},
{
name: "success: nested array with explicit delimiter",
input: "data(type^values[;])",
want: &csvpp.ColumnHeader{
Name: "data",
Kind: csvpp.StructuredField,
ComponentDelimiter: csvpp.DefaultComponentDelimiter,
Components: []*csvpp.ColumnHeader{
{Name: "type", Kind: csvpp.SimpleField},
{Name: "values", Kind: csvpp.ArrayField, ArrayDelimiter: ';'},
},
},
},
{
name: "error: nested array with empty brackets",
input: "data(type^values[])",
wantErr: true,
},
{
name: "error: empty string",
input: "",
Expand Down Expand Up @@ -242,48 +260,74 @@ func TestParseName(t *testing.T) {
}
}

func TestParseArrayDelimiter(t *testing.T) {
func TestParseArrayDelimiterWithDepth(t *testing.T) {
t.Parallel()

tests := []struct {
name string
input string
depth int
wantDelim rune
wantRest string
wantErr bool
}{
{
name: "success: default delimiter",
name: "success: default delimiter at top level",
input: "[]",
depth: 0,
wantDelim: csvpp.DefaultArrayDelimiter,
wantRest: "",
},
{
name: "success: custom delimiter",
name: "success: custom delimiter at top level",
input: "[|]",
depth: 0,
wantDelim: '|',
wantRest: "",
},
{
name: "success: custom delimiter at nested level",
input: "[;]",
depth: 1,
wantDelim: ';',
wantRest: "",
},
{
name: "success: with rest",
input: "[](lat^lon)",
depth: 0,
wantDelim: csvpp.DefaultArrayDelimiter,
wantRest: "(lat^lon)",
},
{
name: "success: no bracket",
input: "(lat^lon)",
depth: 0,
wantDelim: 0,
wantRest: "(lat^lon)",
},
{
name: "error: missing closing bracket",
input: "[|",
depth: 0,
wantErr: true,
},
{
name: "error: multiple characters as delimiter",
input: "[||]",
depth: 0,
wantErr: true,
},
{
name: "error: empty brackets at nested level",
input: "[]",
depth: 1,
wantErr: true,
},
{
name: "error: empty brackets at deeply nested level",
input: "[]",
depth: 3,
wantErr: true,
},
}
Expand All @@ -292,19 +336,19 @@ func TestParseArrayDelimiter(t *testing.T) {
t.Run(tt.name, func(t *testing.T) {
t.Parallel()

gotDelim, gotRest, err := csvpp.ParseArrayDelimiter(tt.input)
gotDelim, gotRest, err := csvpp.ParseArrayDelimiterWithDepth(tt.input, tt.depth)
if (err != nil) != tt.wantErr {
t.Errorf("parseArrayDelimiter() error = %v, wantErr %v", err, tt.wantErr)
t.Errorf("parseArrayDelimiterWithDepth() error = %v, wantErr %v", err, tt.wantErr)
return
}
if tt.wantErr {
return
}
if gotDelim != tt.wantDelim {
t.Errorf("parseArrayDelimiter() delim = %v, want %v", gotDelim, tt.wantDelim)
t.Errorf("parseArrayDelimiterWithDepth() delim = %v, want %v", gotDelim, tt.wantDelim)
}
if diff := cmp.Diff(tt.wantRest, gotRest); diff != "" {
t.Errorf("parseArrayDelimiter() rest mismatch (-want +got):\n%s", diff)
t.Errorf("parseArrayDelimiterWithDepth() rest mismatch (-want +got):\n%s", diff)
}
})
}
Expand Down
56 changes: 35 additions & 21 deletions reader_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -377,27 +377,41 @@ func TestSplitByRune(t *testing.T) {
func TestReader_NestedStructuredField(t *testing.T) {
t.Parallel()

// Test nested array in components
input := "name,data[](type^values[])\nAlice,home^1~2~work^3~4\n"
r := csvpp.NewReader(strings.NewReader(input))
t.Run("error: nested array with empty brackets", func(t *testing.T) {
t.Parallel()

got, err := r.Read()
if err != nil {
t.Fatalf("Reader.Read() error = %v", err)
}
// Per draft-02, nested arrays with empty brackets are invalid
input := "name,data[](type^values[])\nAlice,home^1~2~work^3~4\n"
r := csvpp.NewReader(strings.NewReader(input))

want := []*csvpp.Field{
{Value: "Alice"},
{Components: []*csvpp.Field{
{Components: []*csvpp.Field{{Value: "home"}, {Values: []string{"1"}}}},
{Components: []*csvpp.Field{{Value: "2"}, {Values: []string{}}}},
{Components: []*csvpp.Field{{Value: "work"}, {Values: []string{"3"}}}},
{Components: []*csvpp.Field{{Value: "4"}, {Values: []string{}}}},
}},
}
// Note: This test documents current behavior (may need adjustment based on spec)
_ = want
_ = got
_, err := r.Read()
if err == nil {
t.Error("Reader.Read() expected error for nested array with empty brackets")
}
})

t.Run("success: nested array with explicit delimiter", func(t *testing.T) {
t.Parallel()

input := "name,data[](type^values[;])\nAlice,home^1;2~work^3;4\n"
r := csvpp.NewReader(strings.NewReader(input))

got, err := r.Read()
if err != nil {
t.Fatalf("Reader.Read() error = %v", err)
}

want := []*csvpp.Field{
{Value: "Alice"},
{Components: []*csvpp.Field{
{Components: []*csvpp.Field{{Value: "home"}, {Values: []string{"1", "2"}}}},
{Components: []*csvpp.Field{{Value: "work"}, {Values: []string{"3", "4"}}}},
}},
}
if diff := cmp.Diff(want, got); diff != "" {
t.Errorf("Reader.Read() mismatch (-want +got):\n%s", diff)
}
})
}

func TestReader_MismatchedFields(t *testing.T) {
Expand Down Expand Up @@ -450,8 +464,8 @@ func TestReader_MaxNestingDepth(t *testing.T) {
func TestReader_NestedComponents(t *testing.T) {
t.Parallel()

// Test nested structured field with array component
input := "name,data(type^values[])\nAlice,home^1~2~3\n"
// Test nested structured field with array component (explicit delimiter per draft-02)
input := "name,data(type^values[;])\nAlice,home^1;2;3\n"
r := csvpp.NewReader(strings.NewReader(input))

got, err := r.Read()
Expand Down