Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions lib/csv/csv.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,12 @@ import (
"bytes"
"encoding/csv"
"fmt"
"strings"
"sync"

"github.com/1set/starlet/dataconv"
tps "github.com/1set/starlet/dataconv/types"
"github.com/1set/starlet/internal/replacecr"
"github.com/1set/starlet/lib/file"
"go.starlark.net/starlark"
"go.starlark.net/starlarkstruct"
)
Expand Down Expand Up @@ -65,7 +65,8 @@ func readAll(thread *starlark.Thread, b *starlark.Builtin, args starlark.Tuple,
}

// prepare reader
csvr := csv.NewReader(replacecr.Reader(strings.NewReader(source.GoString())))
rawStr := file.TrimUTF8BOM([]byte(source.GoString()))
csvr := csv.NewReader(replacecr.Reader(bytes.NewReader(rawStr)))
csvr.LazyQuotes = lazyQuotes
csvr.TrimLeadingSpace = trimLeadingSpace

Expand Down
41 changes: 41 additions & 0 deletions lib/csv/csv_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -305,6 +305,47 @@ x = write_dict([{"a": 200, "b": 100, "c": 500},{"b": 1024, "C": 2048}], header=[
assert.eq(x, "c,b\n500,100\n,1024\n")
`),
},
{
name: `read_all: with UTF-8 BOM`,
script: itn.HereDoc(`
load('csv', 'read_all')
# UTF-8 BOM is represented by bytes EF BB BF at the beginning of the file
# In this test we use the hex representation as a string
csv_with_bom = b"\xef\xbb\xbfa,b,c\n1,2,3\n4,5,6"
assert.eq(read_all(csv_with_bom), [["a","b","c"],["1","2","3"],["4","5","6"]])
`),
},
{
name: `read_all: with UTF-8 BOM and different comma`,
script: itn.HereDoc(`
load('csv', 'read_all')
# UTF-8 BOM with semicolon as delimiter
csv_with_bom = b"\xef\xbb\xbfa;b;c\n1;2;3\n4;5;6"
assert.eq(read_all(csv_with_bom, comma=";"), [["a","b","c"],["1","2","3"],["4","5","6"]])
`),
},
{
name: `read_all: with UTF-8 BOM and comments`,
script: itn.HereDoc(`
load('csv', 'read_all')
# UTF-8 BOM with comments
csv_with_bom = b"\xef\xbb\xbfa,b,c\n#comment line\n1,2,3\n4,5,6"
assert.eq(read_all(csv_with_bom, comment="#"), [["a","b","c"],["1","2","3"],["4","5","6"]])
`),
},
{
name: `read_all: ensure BOM is properly removed`,
script: itn.HereDoc(`
load('csv', 'read_all')
# UTF-8 BOM should be removed properly and not affect the first field
# If not handled properly, the first character "a" would include the BOM bytes
csv_with_bom = b"\xef\xbb\xbfa,b,c\n1,2,3\n4,5,6"
result = read_all(csv_with_bom)
first_field = result[0][0]
assert.eq(first_field, "a")
assert.eq(len(first_field), 1) # Length should be 1, not 4 (3 BOM bytes + "a")
`),
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
Expand Down
192 changes: 192 additions & 0 deletions lib/json/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -90,3 +90,195 @@ print(dumps({'a': 10, 'b': 20}, indent=2))
# "b": 20
# }
```

### `try_dumps(obj, indent=0) tuple`

The try_dumps function is a variant of dumps that handles errors gracefully.
It accepts the same parameters as dumps, but returns a tuple of (result, error).
If successful, error will be None. If an error occurs, result will be None and error will contain the error message.

#### Examples

**Basic**

Try to dump a Starlark dict to a JSON string and handle potential errors.

```python
load('json', 'try_dumps')
result, error = try_dumps({'a': 10, 'b': 20}, indent=2)
print("Result:", result)
print("Error:", error)
# Output:
# Result: {
# "a": 10,
# "b": 20
# }
# Error: None
```

### `try_encode(x) tuple`

The try_encode function is a variant of encode that handles errors gracefully.
It accepts the same parameter as encode, but returns a tuple of (result, error).
If successful, error will be None. If an error occurs, result will be None and error will contain the error message.

#### Examples

**Basic**

Try to encode a Starlark dict to a JSON string and handle potential errors.

```python
load('json', 'try_encode')
result, error = try_encode({'a': 10, 'b': 20})
print("Result:", result)
print("Error:", error)
# Output:
# Result: {"a":10,"b":20}
# Error: None
```

### `try_decode(x) tuple`

The try_decode function is a variant of decode that handles errors gracefully.
It accepts the same parameter as decode, but returns a tuple of (result, error).
If successful, error will be None. If an error occurs, result will be None and error will contain the error message.

#### Examples

**Basic**

Try to decode a JSON string to a Starlark dict and handle potential errors.

```python
load('json', 'try_decode')
result, error = try_decode('{"a":10,"b":20}')
print("Result:", result)
print("Error:", error)
# Output:
# Result: {'a': 10, 'b': 20}
# Error: None
```

### `try_indent(str, prefix="", indent="\t") tuple`

The try_indent function is a variant of indent that handles errors gracefully.
It accepts the same parameters as indent, but returns a tuple of (result, error).
If successful, error will be None. If an error occurs, result will be None and error will contain the error message.

#### Examples

**Basic**

Try to indent a JSON string and handle potential errors.

```python
load('json', 'try_indent')
result, error = try_indent('{"a":10,"b":20}', indent=" ")
print("Result:", result)
print("Error:", error)
# Output:
# Result: {
# "a": 10,
# "b": 20
# }
# Error: None
```

### `path(data, path) list`

The path function performs a JSONPath query on the given JSON data and returns the matching elements.
It accepts two positional arguments:
- data: JSON data as a string, bytes, or Starlark value (dict, list, etc.)
- path: A JSONPath expression string
It returns a list of matching elements. If no matches are found, an empty list is returned.
If the JSONPath expression is invalid, an error is raised.

#### Examples

**Basic**

Query JSON data using JSONPath expressions.

```python
load('json', 'path')
data = '''{"store":{"book":[{"title":"Moby Dick","price":8.99},{"title":"War and Peace","price":12.99}]}}'''
titles = path(data, '$.store.book[*].title')
print(titles)
# Output: ['Moby Dick', 'War and Peace']
prices = path(data, '$..price')
print(prices)
# Output: [8.99, 12.99]
```

### `try_path(data, path) tuple`

The try_path function is a variant of path that handles errors gracefully.
It accepts the same parameters as path, but returns a tuple of (result, error).
If successful, error will be None. If an error occurs, result will be None and error will contain the error message.

#### Examples

**Basic**

Try to query JSON data using JSONPath and handle potential errors.

```python
load('json', 'try_path')
data = '''{"store":{"book":[{"title":"Moby Dick","price":8.99},{"title":"War and Peace","price":12.99}]}}'''
result, error = try_path(data, '$..price')
print("Result:", result)
print("Error:", error)
# Output:
# Result: [8.99, 12.99]
# Error: None
```

### `eval(data, expr) value`

The eval function evaluates a JSONPath expression on the given JSON data and returns the evaluation result.
It accepts two positional arguments:
- data: JSON data as a string, bytes, or Starlark value (dict, list, etc.)
- expr: A JSONPath expression string to evaluate
It returns the result of the evaluation, which can be a number, string, boolean, list, dict, or None.
If the expression is invalid, an error is raised.

#### Examples

**Basic**

Evaluate JSONPath expressions on JSON data.

```python
load('json', 'eval')
data = '''{"store":{"book":[{"price":8.99},{"price":12.99},{"price":5.99}]}}'''
avg_price = eval(data, 'avg($..price)')
print(avg_price)
# Output: 9.323333333333334
sum_price = eval(data, 'sum($..price)')
print(sum_price)
# Output: 27.97
```

### `try_eval(data, expr) tuple`

The try_eval function is a variant of eval that handles errors gracefully.
It accepts the same parameters as eval, but returns a tuple of (result, error).
If successful, error will be None. If an error occurs, result will be None and error will contain the error message.

#### Examples

**Basic**

Try to evaluate JSONPath expressions on JSON data and handle potential errors.

```python
load('json', 'try_eval')
data = '''{"store":{"book":[{"price":8.99},{"price":12.99},{"price":5.99}]}}'''
result, error = try_eval(data, 'avg($..price)')
print("Result:", result)
print("Error:", error)
# Output:
# Result: 9.323333333333334
# Error: None
```
4 changes: 2 additions & 2 deletions lib/net/network_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -84,8 +84,8 @@ func TestLoadModule_NSLookUp(t *testing.T) {
load('net', 'nslookup')
ips = nslookup('bing.com', 'microsoft.com', timeout=1)
`),
wantErr: `i/o timeout`,
skipWindows: true, // on Windows 2022 with Go 1.18.10, it returns results from the default DNS server
wantErr: `timeout`, // Accept any error containing "timeout"
skipWindows: true, // on Windows 2022 with Go 1.18.10, it returns results from the default DNS server
},
{
name: `nslookup: no args`,
Expand Down
2 changes: 1 addition & 1 deletion lib/path/path_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -375,7 +375,7 @@ func TestLoadModule_Path(t *testing.T) {
load('path', 'listdir')
def run():
if runtime_os == "darwin":
p = listdir('/var/root')
p = listdir('/var/db/sudo')
elif runtime_os == "linux":
p = listdir('/root')
else:
Expand Down
Loading