diff --git a/lib/csv/csv.go b/lib/csv/csv.go index a0c4927b..5d32b714 100644 --- a/lib/csv/csv.go +++ b/lib/csv/csv.go @@ -7,12 +7,12 @@ import ( "bytes" "encoding/csv" "fmt" - "strings" "sync" "github.com/1set/starlet/dataconv" tps "github.com/1set/starlet/dataconv/types" "github.com/1set/starlet/internal/replacecr" + "github.com/1set/starlet/lib/file" "go.starlark.net/starlark" "go.starlark.net/starlarkstruct" ) @@ -65,7 +65,8 @@ func readAll(thread *starlark.Thread, b *starlark.Builtin, args starlark.Tuple, } // prepare reader - csvr := csv.NewReader(replacecr.Reader(strings.NewReader(source.GoString()))) + rawStr := file.TrimUTF8BOM([]byte(source.GoString())) + csvr := csv.NewReader(replacecr.Reader(bytes.NewReader(rawStr))) csvr.LazyQuotes = lazyQuotes csvr.TrimLeadingSpace = trimLeadingSpace diff --git a/lib/csv/csv_test.go b/lib/csv/csv_test.go index 977ec9a1..ba75a528 100644 --- a/lib/csv/csv_test.go +++ b/lib/csv/csv_test.go @@ -305,6 +305,47 @@ x = write_dict([{"a": 200, "b": 100, "c": 500},{"b": 1024, "C": 2048}], header=[ assert.eq(x, "c,b\n500,100\n,1024\n") `), }, + { + name: `read_all: with UTF-8 BOM`, + script: itn.HereDoc(` +load('csv', 'read_all') +# UTF-8 BOM is represented by bytes EF BB BF at the beginning of the file +# In this test we use the hex representation as a string +csv_with_bom = b"\xef\xbb\xbfa,b,c\n1,2,3\n4,5,6" +assert.eq(read_all(csv_with_bom), [["a","b","c"],["1","2","3"],["4","5","6"]]) + `), + }, + { + name: `read_all: with UTF-8 BOM and different comma`, + script: itn.HereDoc(` +load('csv', 'read_all') +# UTF-8 BOM with semicolon as delimiter +csv_with_bom = b"\xef\xbb\xbfa;b;c\n1;2;3\n4;5;6" +assert.eq(read_all(csv_with_bom, comma=";"), [["a","b","c"],["1","2","3"],["4","5","6"]]) + `), + }, + { + name: `read_all: with UTF-8 BOM and comments`, + script: itn.HereDoc(` +load('csv', 'read_all') +# UTF-8 BOM with comments +csv_with_bom = b"\xef\xbb\xbfa,b,c\n#comment line\n1,2,3\n4,5,6" +assert.eq(read_all(csv_with_bom, comment="#"), [["a","b","c"],["1","2","3"],["4","5","6"]]) + `), + }, + { + name: `read_all: ensure BOM is properly removed`, + script: itn.HereDoc(` +load('csv', 'read_all') +# UTF-8 BOM should be removed properly and not affect the first field +# If not handled properly, the first character "a" would include the BOM bytes +csv_with_bom = b"\xef\xbb\xbfa,b,c\n1,2,3\n4,5,6" +result = read_all(csv_with_bom) +first_field = result[0][0] +assert.eq(first_field, "a") +assert.eq(len(first_field), 1) # Length should be 1, not 4 (3 BOM bytes + "a") + `), + }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { diff --git a/lib/json/README.md b/lib/json/README.md index bcf1cb3b..c4d75f55 100644 --- a/lib/json/README.md +++ b/lib/json/README.md @@ -90,3 +90,195 @@ print(dumps({'a': 10, 'b': 20}, indent=2)) # "b": 20 # } ``` + +### `try_dumps(obj, indent=0) tuple` + +The try_dumps function is a variant of dumps that handles errors gracefully. +It accepts the same parameters as dumps, but returns a tuple of (result, error). +If successful, error will be None. If an error occurs, result will be None and error will contain the error message. + +#### Examples + +**Basic** + +Try to dump a Starlark dict to a JSON string and handle potential errors. + +```python +load('json', 'try_dumps') +result, error = try_dumps({'a': 10, 'b': 20}, indent=2) +print("Result:", result) +print("Error:", error) +# Output: +# Result: { +# "a": 10, +# "b": 20 +# } +# Error: None +``` + +### `try_encode(x) tuple` + +The try_encode function is a variant of encode that handles errors gracefully. +It accepts the same parameter as encode, but returns a tuple of (result, error). +If successful, error will be None. If an error occurs, result will be None and error will contain the error message. + +#### Examples + +**Basic** + +Try to encode a Starlark dict to a JSON string and handle potential errors. + +```python +load('json', 'try_encode') +result, error = try_encode({'a': 10, 'b': 20}) +print("Result:", result) +print("Error:", error) +# Output: +# Result: {"a":10,"b":20} +# Error: None +``` + +### `try_decode(x) tuple` + +The try_decode function is a variant of decode that handles errors gracefully. +It accepts the same parameter as decode, but returns a tuple of (result, error). +If successful, error will be None. If an error occurs, result will be None and error will contain the error message. + +#### Examples + +**Basic** + +Try to decode a JSON string to a Starlark dict and handle potential errors. + +```python +load('json', 'try_decode') +result, error = try_decode('{"a":10,"b":20}') +print("Result:", result) +print("Error:", error) +# Output: +# Result: {'a': 10, 'b': 20} +# Error: None +``` + +### `try_indent(str, prefix="", indent="\t") tuple` + +The try_indent function is a variant of indent that handles errors gracefully. +It accepts the same parameters as indent, but returns a tuple of (result, error). +If successful, error will be None. If an error occurs, result will be None and error will contain the error message. + +#### Examples + +**Basic** + +Try to indent a JSON string and handle potential errors. + +```python +load('json', 'try_indent') +result, error = try_indent('{"a":10,"b":20}', indent=" ") +print("Result:", result) +print("Error:", error) +# Output: +# Result: { +# "a": 10, +# "b": 20 +# } +# Error: None +``` + +### `path(data, path) list` + +The path function performs a JSONPath query on the given JSON data and returns the matching elements. +It accepts two positional arguments: +- data: JSON data as a string, bytes, or Starlark value (dict, list, etc.) +- path: A JSONPath expression string + It returns a list of matching elements. If no matches are found, an empty list is returned. + If the JSONPath expression is invalid, an error is raised. + +#### Examples + +**Basic** + +Query JSON data using JSONPath expressions. + +```python +load('json', 'path') +data = '''{"store":{"book":[{"title":"Moby Dick","price":8.99},{"title":"War and Peace","price":12.99}]}}''' +titles = path(data, '$.store.book[*].title') +print(titles) +# Output: ['Moby Dick', 'War and Peace'] +prices = path(data, '$..price') +print(prices) +# Output: [8.99, 12.99] +``` + +### `try_path(data, path) tuple` + +The try_path function is a variant of path that handles errors gracefully. +It accepts the same parameters as path, but returns a tuple of (result, error). +If successful, error will be None. If an error occurs, result will be None and error will contain the error message. + +#### Examples + +**Basic** + +Try to query JSON data using JSONPath and handle potential errors. + +```python +load('json', 'try_path') +data = '''{"store":{"book":[{"title":"Moby Dick","price":8.99},{"title":"War and Peace","price":12.99}]}}''' +result, error = try_path(data, '$..price') +print("Result:", result) +print("Error:", error) +# Output: +# Result: [8.99, 12.99] +# Error: None +``` + +### `eval(data, expr) value` + +The eval function evaluates a JSONPath expression on the given JSON data and returns the evaluation result. +It accepts two positional arguments: +- data: JSON data as a string, bytes, or Starlark value (dict, list, etc.) +- expr: A JSONPath expression string to evaluate + It returns the result of the evaluation, which can be a number, string, boolean, list, dict, or None. + If the expression is invalid, an error is raised. + +#### Examples + +**Basic** + +Evaluate JSONPath expressions on JSON data. + +```python +load('json', 'eval') +data = '''{"store":{"book":[{"price":8.99},{"price":12.99},{"price":5.99}]}}''' +avg_price = eval(data, 'avg($..price)') +print(avg_price) +# Output: 9.323333333333334 +sum_price = eval(data, 'sum($..price)') +print(sum_price) +# Output: 27.97 +``` + +### `try_eval(data, expr) tuple` + +The try_eval function is a variant of eval that handles errors gracefully. +It accepts the same parameters as eval, but returns a tuple of (result, error). +If successful, error will be None. If an error occurs, result will be None and error will contain the error message. + +#### Examples + +**Basic** + +Try to evaluate JSONPath expressions on JSON data and handle potential errors. + +```python +load('json', 'try_eval') +data = '''{"store":{"book":[{"price":8.99},{"price":12.99},{"price":5.99}]}}''' +result, error = try_eval(data, 'avg($..price)') +print("Result:", result) +print("Error:", error) +# Output: +# Result: 9.323333333333334 +# Error: None +``` diff --git a/lib/net/network_test.go b/lib/net/network_test.go index cd09b552..7ace63e2 100644 --- a/lib/net/network_test.go +++ b/lib/net/network_test.go @@ -84,8 +84,8 @@ func TestLoadModule_NSLookUp(t *testing.T) { load('net', 'nslookup') ips = nslookup('bing.com', 'microsoft.com', timeout=1) `), - wantErr: `i/o timeout`, - skipWindows: true, // on Windows 2022 with Go 1.18.10, it returns results from the default DNS server + wantErr: `timeout`, // Accept any error containing "timeout" + skipWindows: true, // on Windows 2022 with Go 1.18.10, it returns results from the default DNS server }, { name: `nslookup: no args`, diff --git a/lib/path/path_test.go b/lib/path/path_test.go index 6c7a808f..1b5a9f7c 100644 --- a/lib/path/path_test.go +++ b/lib/path/path_test.go @@ -375,7 +375,7 @@ func TestLoadModule_Path(t *testing.T) { load('path', 'listdir') def run(): if runtime_os == "darwin": - p = listdir('/var/root') + p = listdir('/var/db/sudo') elif runtime_os == "linux": p = listdir('/root') else: