diff --git a/README.md b/README.md index 28e581e..02745e1 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,8 @@ -This is the Bson implementation for Erlang. Bson is a record-like data type with a standard binary representation defined at . This implements version 1.0 of that spec. The standard binary form allows for easy data interchange between systems. In particular, [MongoDB](http://www.mongodb.org) uses it for exchanging data between the MongoDB server and its clients. +This is the BSON implementation for Erlang. -The root Bson data type is `bson:document()`. Conceptually, it is a list of name-value pairs, analogous to an associative array, dictionary, or record. However, in this implementation, for writability and readability, the list of pairs is flattened, (ie. the tuples for each pair are elided), and the list is actually a tuple to distinguish it from list (array) of values. So a document is a tuple with alternating name and value elements, where a name is an `atom()` and a value is a `bson:value()`, which includes basic types like `boolean()`, `number()`, `atom()`, `bson:utf8()` (string), and compound types like `[bson:value()]` and `bson:document()`. For example, +BSON is a record-like data type with a standard binary representation defined at . This implements version 1.0 of that spec. The standard binary form allows for easy data interchange between systems. In particular, [MongoDB](http://www.mongodb.org) uses it for exchanging data between the MongoDB server and its clients. + +The root BSON data type is `bson:document()`, a list of name-value pairs, analogous to an associative array, dictionary, or record. In this implementation, for writability and readability, the list of pairs is flattened (i.e. the tuples for each pair are elided), and the list is actually a tuple to distinguish it from list (array) of values. Hence a document is a tuple with alternating name and value elements, where a name is an `atom()` and a value is a `bson:value()`, which includes basic types like `boolean()`, `number()`, `atom()`, `bson:utf8()` (string), and compound types like `[bson:value()]` and `bson:document()`. For example, > Doc = {x,<<"abc">>, y,[1,2,3], z,{a,'Foo', b,4.2}}. @@ -15,8 +17,8 @@ is a document with three fields: `{x,<<"abc">>}` and `{y,[1,2,3]}`, and `{z,{a,' > {x,<<"abc">>, y,[1,2,3], z,null, w,1} = bson:merge ({w,1, z,null}, Doc). > {w,1, x,<<"abc">>, y,[1,2,3], z,{a,'Foo', b,4.2}} = bson:append ({w,1}, Doc). -For the full list of `bson:value()` types see the [bson](http://github.com/TonyGen/bson-erlang/blob/master/src/bson.erl) module. Notice that an Erlang `string()` will be interpreted as a list of integers, so remember to alway delimit string literals with binary brackets (eg. `<<"abc">>`) and convert string variables using `bson:utf8`. You may be tempted to use atoms instead of strings, but you should only use atoms for enumerated types. +For the full list of `bson:value()` types see the [bson](http://github.com/mongodb/bson-erlang/blob/master/src/bson.erl) module. Notice that an Erlang `string()` will be interpreted as a list of integers, so remember to alway delimit string literals with binary brackets (eg. `<<"abc">>`) and convert string variables using `bson:utf8`. You may be tempted to use atoms instead of strings, but you should only use atoms for enumerated types. -There are some special `bson:value()` types like `bson:javascript()` that are tagged tuples, eg. `{javascript, {x,1}, <<"function (y) {return y + x}">>}`. But embedded documents are also tuples, so how do we distinguish between the two? The answer is the `bson:value()` types that are tagged tuples are purposely defined to have an odd number of elements to distinguish them from documents which have an even number of elements. +There are some special `bson:value()` types like `bson:javascript()` that are tagged tuples, eg. `{javascript, {x,1}, <<"function (y) {return y + x}">>}`. But embedded documents are also tuples, so how do we distinguish between the two? Tagged tuple `bson:value()` values intentionally have an odd number of elements, to distinguish them from documents, which always have an even number of elements, as they store key-value pairs. -[API Docs](http://api.mongodb.org/erlang/bson/) - Documentation generated from source code comments +[API Docs](http://api.mongodb.org/erlang/bson/) - Documentation generated from source code comments. diff --git a/ebin/bson.app b/ebin/bson.app deleted file mode 100644 index 9fa5b43..0000000 --- a/ebin/bson.app +++ /dev/null @@ -1,7 +0,0 @@ -{application, bson, - [{description, "BSON are JSON-like objects with a standard binary serialization. See bsonspec.org."}, - {vsn, "0"}, - {modules, [bson, bson_binary, bson_tests]}, - {registered, []}, - {applications, [kernel, stdlib]} - ]}. diff --git a/src/bson.app.src b/src/bson.app.src new file mode 100644 index 0000000..31c0aeb --- /dev/null +++ b/src/bson.app.src @@ -0,0 +1,9 @@ +%% ex: ts=4 sw=4 noexpandtab syntax=erlang +{application, bson, + [ + {description, "BSON are JSON-like objects with a standard binary serialization. See bsonspec.org"}, + {vsn, "1"}, + {registered, []}, + {modules, []}, + {applications, [kernel, stdlib]} +]}. diff --git a/src/bson.erl b/src/bson.erl index 58c3d34..7d74816 100644 --- a/src/bson.erl +++ b/src/bson.erl @@ -3,13 +3,14 @@ -export_type ([maybe/1]). -export_type ([document/0, label/0, value/0]). +-export_type ([arr/0]). -export_type ([bin/0, bfunction/0, uuid/0, md5/0, userdefined/0]). -export_type ([mongostamp/0, minmaxkey/0]). --export_type ([regex/0, unixtime/0]). +-export_type ([utf8/0, regex/0, unixtime/0]). -export_type ([javascript/0]). -export_type ([objectid/0, unixsecs/0]). --export ([lookup/2, lookup/3, at/2, include/2, exclude/2, update/3, merge/2, append/2]). +-export ([lookup/2, lookup/3, at/2, include/2, exclude/2, update/3, merge/2, merge/3, append/2]). -export ([doc_foldl/3, doc_foldr/3, fields/1, document/1]). -export ([utf8/1, str/1]). -export ([timenow/0, ms_precision/1, secs_to_unixtime/1, unixtime_to_secs/1]). @@ -119,6 +120,13 @@ merge (UpDoc, BaseDoc) -> Fun = fun (Label, Value, Doc) -> update (Label, Value, Doc) end, doc_foldl (Fun, BaseDoc, UpDoc). +-spec merge (document(), document(), fun((label(), value(), value()) -> value())) -> document(). +merge(UpDoc, BaseDoc, Fun) -> + Dict1 = orddict:from_list(bson:fields(UpDoc)), + Dict2 = orddict:from_list(bson:fields(BaseDoc)), + bson:document(orddict:merge(Fun, Dict1, Dict2)). + + -spec append (document(), document()) -> document(). %@doc Append two documents together append (Doc1, Doc2) -> list_to_tuple (tuple_to_list (Doc1) ++ tuple_to_list (Doc2)). @@ -230,3 +238,40 @@ objectid (UnixSecs, MachineAndProcId, Count) -> -spec objectid_time (objectid()) -> unixtime(). %@doc Time when object id was generated objectid_time ({<>}) -> secs_to_unixtime (UnixSecs). + + +-ifdef(TEST). +-include_lib("eunit/include/eunit.hrl"). + +bson_test() -> + Doc = {b, {x, 2, y, 3}, + a, 1, + c, [mon, tue, wed]}, + {1} = bson:lookup (a, Doc), + {} = bson:lookup (d, Doc), + 2 = bson:lookup (d, Doc, 2), + 1 = bson:lookup (a, Doc, 3), + 1 = bson:at (a, Doc), + {'EXIT', {missing_field, _}} = (catch bson:at (d, Doc)), + {a, 1} = bson:include ([a], Doc), + {a, 1} = bson:exclude ([b,c], Doc), + {b, {x, 2, y, 3}, a, 1, c, 4.2} = bson:update (c, 4.2, Doc), + {b, 0, a, 1, c, 2, d, 3} = bson:merge ({c, 2, d, 3, b, 0}, Doc), + {a, 1, b, {x, 2, y, 3}, c, 2, d, 3} = bson:merge ({c, 2, d, 3, b, 0}, Doc, fun + (b, _Value1, Value2) -> Value2; + (c, Value1, _Value2) -> Value1 + end), + {a, 1, b, 2, c, 3, d, 4} = bson:append ({a, 1, b, 2}, {c, 3, d, 4}), + [{b, {x, 2, y, 3}}, {a, 1}, {c, [mon, tue, wed]}] = bson:fields (Doc). + +time_test() -> + {MegaSecs, Secs, _} = bson:timenow(), + {MegaSecs, Secs, 0} = bson:secs_to_unixtime (bson:unixtime_to_secs ({MegaSecs, Secs, 0})). + +objectid_test() -> + {<<1:32/big, 2:24/big, 3:16/big, 4:24/big>>} = bson:objectid (1, <<2:24/big, 3:16/big>>, 4), + UnixSecs = bson:unixtime_to_secs (bson:timenow()), + UnixTime = bson:objectid_time (bson:objectid (UnixSecs, <<2:24/big, 3:16/big>>, 4)), + UnixSecs = bson:unixtime_to_secs (UnixTime). + +-endif. diff --git a/src/bson_binary.erl b/src/bson_binary.erl index d96fbcf..a5de084 100644 --- a/src/bson_binary.erl +++ b/src/bson_binary.erl @@ -16,6 +16,7 @@ put_field (Name, Value) -> case Value of false -> <>; true -> <>; null -> <>; + undefined -> <>; 'MIN_KEY' -> <>; 'MAX_KEY' -> <>; {Oid} -> <>; @@ -47,10 +48,11 @@ get_field (<>) -> 3 -> get_document (Bin1); 4 -> get_array (Bin1); 5 -> {BinType, Bin, Bin2} = get_binary (Bin1), {{bin, BinType, Bin}, Bin2}; + 6 -> {undefined, Bin1}; % Treat the deprecated "undefined" value as null, which we call 'undefined'! 7 -> {Oid, Bin2} = get_oid (Bin1), {{Oid}, Bin2}; 8 -> <> = Bin1, {case Bit of 0 -> false; 1 -> true end, Bin2}; 9 -> get_unixtime (Bin1); - 10 -> {null, Bin1}; + 10 -> {undefined, Bin1}; 11 -> {Pat, Bin2} = get_cstring (Bin1), {Opt, Bin3} = get_cstring (Bin2), @@ -89,8 +91,10 @@ get_cstring (Bin) -> % list_to_tuple (binary:split (Bin, <<0>>)). put_document (Document) -> Bin = bson:doc_foldl (fun put_field_accum/3, <<>>, Document), <>. -put_field_accum (Label, Value, Bin) -> - <>. +put_field_accum (Label, Value, Bin) when is_atom(Label) -> + <>; +put_field_accum (Label, Value, Bin) when is_binary(Label) -> + <>. -spec get_document (binary()) -> {bson:document(), binary()}. get_document (<>) -> @@ -161,3 +165,41 @@ put_oid (<>) -> Oid. -spec get_oid (binary()) -> {<<_:96>>, binary()}. get_oid (<>) -> {Oid, Bin}. + + +-ifdef(TEST). +-include_lib("eunit/include/eunit.hrl"). + +binary_test() -> + Doc = {'BSON', [<<"awesome">>, 5.05, 1986]}, + Bin = bson_binary:put_document (Doc), + Bin = <<49,0,0,0,4,66,83,79,78,0,38,0,0,0,2,48,0,8,0,0,0,97,119,101,115,111,109,101,0,1,49,0,51,51,51,51,51,51,20,64,16,50,0,194,7,0,0,0,0>>, + VBin = <<200,12,240,129,100,90,56,198,34,0,0>>, + Time = bson:timenow(), + Doc1 = {a, -4.230845, + b, <<"hello">>, + c, {x, -1, y, 2.2001}, + d, [23, 45, 200], + eeeeeeeee, {bin, bin, VBin}, + f, {bin, function, VBin}, + g, {bin, uuid, Bin}, + h, {bin, md5, VBin}, + i, {bin, userdefined, Bin}, + j, bson:objectid (bson:unixtime_to_secs (Time), <<2:24/big, 3:16/big>>, 4), + k1, false, + k2, true, + l, Time, + m, undefined, + n, {regex, <<"foo">>, <<"bar">>}, + o1, {javascript, {}, <<"function(x) = x + 1;">>}, + o2, {javascript, {x, 0, y, <<"foo">>}, <<"function(a) = a + x">>}, + p, atom, + q1, -2000444000, + q2, -8000111000222001, + r, {mongostamp, 100022, 995332003}, + s1, 'MIN_KEY', + s2, 'MAX_KEY'}, + Bin1 = bson_binary:put_document (Doc1), + {Doc1, <<>>} = bson_binary:get_document (Bin1). + +-endif. diff --git a/src/bson_schema.erl b/src/bson_schema.erl new file mode 100644 index 0000000..f4ca092 --- /dev/null +++ b/src/bson_schema.erl @@ -0,0 +1,108 @@ +-module (bson_schema). + +-export ([validate/2, validate_value/2]). + + +-spec validate(bson:document(), bson:document()) -> bson:document(). +validate(Document, Spec) -> + bson:doc_foldl(fun(Key, KeySpec, Acc) -> + case bson:lookup(Key, Document) of + {Value} -> + bson:update(Key, validate_value(Value, KeySpec), Acc); + {} -> + case lists:member(required, KeySpec) of + true -> error(badarg, [Document, Spec]); + false -> Acc + end + end + end, {}, Spec). + + +-spec validate_value(term(), list()) -> term(). +validate_value(Value, []) -> + Value; + +validate_value(Value, [required | Rest]) -> + validate_value(Value, Rest); + +validate_value({<<_:96>>} = Value, [object_id | Rest]) -> + validate_value(Value, Rest); +validate_value(Value, [object_id | _] = Spec) -> + error(badarg, [Value, Spec]); + +validate_value(Value, [{atom, Values} | Rest] = Spec) -> + try lists:foreach(fun(V) -> + case V == Value orelse atom_to_binary(V, unicode) == Value of + true -> throw(V); + false -> false + end + end, Values) of + _ -> error(badarg, [Value, Spec]) + catch + throw:V -> validate_value(V, Rest) + end; + +validate_value(Value, [utf8 | Rest] = Spec) -> + case unicode:characters_to_binary(Value) of + {error, _, _} -> error(badarg, [Value, Spec]); + {incomplete, _, _} -> error(badarg, [Value, Spec]); + Data -> validate_value(Data, Rest) + end; + +validate_value(Value, [{length, Min, Max} | Rest] = Spec) when is_list(Value) -> + case within(length(Value), Min, Max) of + true -> validate_value(Value, Rest); + false -> error(badarg, [Value, Spec]) + end; +validate_value(Value, [{length, Min, Max} | Rest] = Spec) when is_binary(Value) -> + case within(byte_size(Value), Min, Max) of + true -> validate_value(Value, Rest); + false -> error(badarg, [Value, Spec]) + end; +validate_value(Value, [{length, _Min, _Max} | _] = Spec) -> + error(badarg, [Value, Spec]); + +validate_value(Value, [{float, Min, Max} | Rest] = Spec) when is_float(Value) -> + case within(Value, Min, Max) of + true -> validate_value(Value, Rest); + false -> error(badarg, [Value, Spec]) + end; +validate_value(Value, [{float, _Min, _Max} | _] = Spec) -> + error(badarg, [Value, Spec]); + +validate_value(Value, [{integer, Min, Max} | Rest] = Spec) when is_integer(Value) -> + case within(Value, Min, Max) of + true -> validate_value(Value, Rest); + false -> error(badarg, [Value, Spec]) + end; +validate_value(Value, [{integer, _Min, _Max} | _] = Spec) -> + error(badarg, [Value, Spec]); + +validate_value({A, B, C} = Value, [timestamp | Rest]) when is_integer(A), is_integer(B), is_integer(C) -> + validate_value(Value, Rest); +validate_value(Value, [timestamp | _] = Spec) -> + error(badarg, [Value, Spec]); + +validate_value(Value, [{list, Subspec} | Rest]) when is_list(Value) -> + validate_value([validate_value(V, Subspec) || V <- Value], Rest); +validate_value(Value, [{list, _Subspec} | _] = Spec) -> + error(badarg, [Value, Spec]); + +validate_value(Value, [{object, Subspec} | Rest]) when is_tuple(Value) -> + validate_value(validate(Value, Subspec), Rest); +validate_value(Value, [{object, _Subspec} | _] = Spec) -> + error(badarg, [Value, Spec]). + + +% @private +within(Value, '-infinity', Max) -> + Value =< Max; +within(Value, Min, Max) -> + Value >= Min andalso Value =< Max. + + +-ifdef(TEST). +-include_lib("eunit/include/eunit.hrl"). + + +-endif. diff --git a/src/bson_tests.erl b/src/bson_tests.erl deleted file mode 100644 index 6118572..0000000 --- a/src/bson_tests.erl +++ /dev/null @@ -1,62 +0,0 @@ --module(bson_tests). - --include_lib("eunit/include/eunit.hrl"). - -bson_test() -> - Doc = {b, {x, 2, y, 3}, - a, 1, - c, [mon, tue, wed]}, - {1} = bson:lookup (a, Doc), - {} = bson:lookup (d, Doc), - 2 = bson:lookup (d, Doc, 2), - 1 = bson:lookup (a, Doc, 3), - 1 = bson:at (a, Doc), - {'EXIT', {missing_field, _}} = (catch bson:at (d, Doc)), - {a, 1} = bson:include ([a], Doc), - {a, 1} = bson:exclude ([b,c], Doc), - {b, {x, 2, y, 3}, a, 1, c, 4.2} = bson:update (c, 4.2, Doc), - {b, 0, a, 1, c, 2, d, 3} = bson:merge ({c, 2, d, 3, b, 0}, Doc), - {a, 1, b, 2, c, 3, d, 4} = bson:append ({a, 1, b, 2}, {c, 3, d, 4}), - [{b, {x, 2, y, 3}}, {a, 1}, {c, [mon, tue, wed]}] = bson:fields (Doc). - -time_test() -> - {MegaSecs, Secs, _} = bson:timenow(), - {MegaSecs, Secs, 0} = bson:secs_to_unixtime (bson:unixtime_to_secs ({MegaSecs, Secs, 0})). - -objectid_test() -> - {<<1:32/big, 2:24/big, 3:16/big, 4:24/big>>} = bson:objectid (1, <<2:24/big, 3:16/big>>, 4), - UnixSecs = bson:unixtime_to_secs (bson:timenow()), - UnixTime = bson:objectid_time (bson:objectid (UnixSecs, <<2:24/big, 3:16/big>>, 4)), - UnixSecs = bson:unixtime_to_secs (UnixTime). - -binary_test() -> - Doc = {'BSON', [<<"awesome">>, 5.05, 1986]}, - Bin = bson_binary:put_document (Doc), - Bin = <<49,0,0,0,4,66,83,79,78,0,38,0,0,0,2,48,0,8,0,0,0,97,119,101,115,111,109,101,0,1,49,0,51,51,51,51,51,51,20,64,16,50,0,194,7,0,0,0,0>>, - VBin = <<200,12,240,129,100,90,56,198,34,0,0>>, - Time = bson:timenow(), - Doc1 = {a, -4.230845, - b, <<"hello">>, - c, {x, -1, y, 2.2001}, - d, [23, 45, 200], - eeeeeeeee, {bin, bin, VBin}, - f, {bin, function, VBin}, - g, {bin, uuid, Bin}, - h, {bin, md5, VBin}, - i, {bin, userdefined, Bin}, - j, bson:objectid (bson:unixtime_to_secs (Time), <<2:24/big, 3:16/big>>, 4), - k1, false, - k2, true, - l, Time, - m, null, - n, {regex, <<"foo">>, <<"bar">>}, - o1, {javascript, {}, <<"function(x) = x + 1;">>}, - o2, {javascript, {x, 0, y, <<"foo">>}, <<"function(a) = a + x">>}, - p, atom, - q1, -2000444000, - q2, -8000111000222001, - r, {mongostamp, 100022, 995332003}, - s1, 'MIN_KEY', - s2, 'MAX_KEY'}, - Bin1 = bson_binary:put_document (Doc1), - {Doc1, <<>>} = bson_binary:get_document (Bin1).