From d73f86711ca99c671e92072c13d150dd309a3fa1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?William=20Fank=20Thom=C3=A9?= Date: Wed, 17 Apr 2024 18:00:12 -0300 Subject: [PATCH 01/25] feat: cleanup --- src/bel_scan.erl | 492 --------------------------------------- test/bel_scan_SUITE.erl | 151 ------------ test/support/my_scan.erl | 73 ------ 3 files changed, 716 deletions(-) delete mode 100644 test/bel_scan_SUITE.erl delete mode 100644 test/support/my_scan.erl diff --git a/src/bel_scan.erl b/src/bel_scan.erl index 2b92fcb..7baf3ee 100644 --- a/src/bel_scan.erl +++ b/src/bel_scan.erl @@ -22,508 +22,16 @@ -module(bel_scan). -compile(inline_list_funcs). -% API --export([ new/1 - , string/2 - , continue/2 - , skip_new_lns/2 - , terminate/1 - , new_ln/1 - , incr_col/1 - , incr_col/2 - , snapshot/1 - , update_pos/1 - , pos_text/1 - , anno/1 - , anno/2 - , anno/3 - , token/2 - , token/3 - , push_token/2 - , push_tokens/2 - , fold/2 - ]). - -% State get/set --export([ get/2 - , set/3 - , get_input/1 - , set_input/2 - , get_handler/1 - , set_handler/2 - , get_handler_state/1 - , set_handler_state/2 - , get_tokens/1 - , set_tokens/2 - , get_ln/1 - , set_ln/2 - , get_col/1 - , set_col/2 - , get_loc/1 - , set_loc/2 - , get_snap_loc/1 - , set_snap_loc/2 - , get_buffer_pos/1 - , set_buffer_pos/2 - , get_pos/1 - , set_pos/2 - , get_len/1 - , set_len/2 - , get_source/1 - , set_source/2 - ]). - --export_type([ t/0 - , input/0 - , rest/0 - , handler/0 - , handler_opts/0 - , handler_state/0 - , tag/0 - , metadata/0 - , anno/0 - , value/0 - , token/0 - , line/0 - , column/0 - , location/0 - , position/0 - , length/0 - , result/0 - ]). - -% Callbacks - --callback init(handler_opts()) -> {ok, handler_state()}. - --callback handle_char(char(), rest(), t()) -> t(). - --callback handle_tokens([token()], t()) -> result(). - -% Libs - -ifdef(TEST). -include_lib("eunit/include/eunit.hrl"). -endif. -% Macros - --define(is_ln(X), ( - is_integer(X) andalso X >= 1 -)). - --define(is_col(X), ( - is_integer(X) andalso X >= 1 -)). - --define(is_loc(X), ( - is_tuple(X) - andalso tuple_size(X) =:= 2 - andalso ?is_ln(element(1, X)) - andalso ?is_col(element(2, X)) -)). - --define(is_position(X), ( - is_integer(X) andalso X >= 0 -)). - --define(is_length(X), ( - is_integer(X) andalso X >= 0 -)). - --define(is_filename(X), ( - is_list(X) orelse is_binary(X) -)). - --define(is_source(X), ( - X =:= undefined - orelse ( - is_tuple(X) - andalso tuple_size(X) =:= 2 - andalso ( - ( - element(1, X) =:= file - andalso ?is_filename(element(2, X)) - ) - orelse ( - element(1, X) =:= module - andalso is_atom(element(2, X)) - ) - ) - ) -)). - --define(valid_params(Input, Handler), ( - is_binary(Input) andalso is_atom(Handler) -)). - --define(is_anno(X), ( - is_tuple(X) - andalso tuple_size(X) =:= 3 - andalso ?is_loc(element(1, X)) - andalso ( - element(2, X) =:= undefined - orelse ?is_filename(element(2, X)) - ) -)). - --define(is_token(X), ( - is_tuple(X) - andalso tuple_size(X) =:= 3 - andalso ?is_anno(element(2, X)) -)). - --define(DEFAULTS, #{ - handler_state => undefined, - tokens => [], - ln => 1, - col => 1, - snap_loc => {1, 1}, - buffer_pos => 0, - pos => 0, - len => 0, - source => undefined -}). - -% Types - --record(state, { input :: input() - , handler :: handler() - , handler_state :: handler_state() - , tokens :: [token()] - , ln :: line() - , col :: column() - , snap_loc :: location() - , buffer_pos :: position() - , pos :: position() - , len :: length() - , source :: source() - }). - --opaque t() :: #state{}. --type input() :: binary(). --type rest() :: bitstring(). --type handler() :: module(). --type handler_opts() :: term(). --type handler_state() :: term(). --type line() :: pos_integer(). --type column() :: pos_integer(). --type location() :: {line(), column()}. --type position() :: non_neg_integer(). --type length() :: non_neg_integer(). --type tag() :: term(). --type metadata() :: term(). --type filename() :: file:filename_all() | undefined. --type anno() :: {location(), source(), metadata()}. --type value() :: term(). --type token() :: {tag(), anno(), value()}. --type source() :: {file, filename()} - | {module, module()} - | undefined - . --type result() :: term(). - -%%%===================================================================== -%%% API -%%%===================================================================== - -% Fixes no return warning because of the false positive of the #state{}. --dialyzer({nowarn_function, [new/1]}). - -new(#{input := I, handler := H} = Params) when ?valid_params(I, H) -> - maps:fold(fun set/3, #state{}, maps:merge(?DEFAULTS, Params)). - -string(Opts, #state{} = State) -> - Handler = State#state.handler, - {ok, HandlerState} = Handler:init(Opts), - continue(State#state.input, State#state{handler_state = HandlerState}). - -continue(<>, State0) -> - case skip_new_lns(Rest0, State0) of - {ok, {Char, Rest, #state{handler = Handler} = State}} -> - Handler:handle_char(Char, Rest, State); - {eof, State} -> - terminate(State) - end; -continue(<<>>, #state{} = State) -> - terminate(State). - -skip_new_lns(<<$\r, $\n, Rest/bitstring>>, State) -> - skip_new_lns(Rest, new_ln(incr_col(State))); -skip_new_lns(<<$\r, Rest/bitstring>>, State) -> - skip_new_lns(Rest, new_ln(incr_col(State))); -skip_new_lns(<<$\n, Rest/bitstring>>, State) -> - skip_new_lns(Rest, new_ln(incr_col(State))); -skip_new_lns(<<$\f, Rest/bitstring>>, State) -> - skip_new_lns(Rest, new_ln(incr_col(State))); -skip_new_lns(<>, State) -> - {ok, {Char, Rest, State}}; -skip_new_lns(<<>>, State) -> - {eof, State}. - -terminate(#state{handler = Handler} = State) -> - Handler:handle_tokens(State#state.tokens, State). - -new_ln(#state{} = State) -> - State#state{ - ln = State#state.ln+1, - col = 1 - }. - -incr_col(#state{} = State) -> - incr_col(1, State). - -incr_col(N, #state{} = State) when ?is_col(N) -> - State#state{ - col = State#state.col + N, - buffer_pos = State#state.buffer_pos + N, - len = State#state.len + N - }. - -snapshot(#state{} = State) -> - State#state{ - snap_loc = {State#state.ln, State#state.col} - }. - -update_pos(#state{} = State) -> - State#state{ - pos = State#state.buffer_pos, - len = 0 - }. - -pos_text(#state{} = State) -> - binary_part(State#state.input, State#state.pos, State#state.len). - -anno(State) -> - anno(undefined, State). - -anno(Metadata, #state{} = State) -> - anno(get_snap_loc(State), get_source(State), Metadata). - -anno(Location, Source, Metadata) when ?is_loc(Location), ?is_source(Source) -> - {Location, Source, Metadata}. - -token(Tag, #state{} = State) -> - {Tag, anno(State), pos_text(State)}. - -token(Tag, Metadata, #state{} = State) -> - {Tag, anno(State), Metadata}; -token(Tag, Anno, Metadata) when ?is_anno(Anno) -> - {Tag, Anno, Metadata}. - -push_token(Token, #state{} = State) when ?is_token(Token) -> - State#state{tokens = [Token | State#state.tokens]}. - -push_tokens(Tokens, #state{} = State) when is_list(Tokens) -> - lists:foldl(fun push_token/2, State, Tokens). - -fold(#state{} = State, Funs) when is_list(Funs) -> - lists:foldl(fun(Fun, Acc) when is_function(Fun, 1) -> - Fun(Acc) - end, State, Funs). - -%%%===================================================================== -%%% State get/set -%%%===================================================================== - -get(input, State) -> - get_input(State); -get(handler, State) -> - get_handler(State); -get(handler_state, State) -> - get_handler_state(State); -get(tokens, State) -> - get_tokens(State); -get(ln, State) -> - get_ln(State); -get(col, State) -> - get_col(State); -get(loc, State) -> - get_loc(State); -get(snap_loc, State) -> - get_snap_loc(State); -get(buffer_pos, State) -> - get_buffer_pos(State); -get(pos, State) -> - get_pos(State); -get(len, State) -> - get_len(State); -get(source, State) -> - get_source(State). - -set(input, Value, State) -> - set_input(Value, State); -set(handler, Value, State) -> - set_handler(Value, State); -set(handler_state, Value, State) -> - set_handler_state(Value, State); -set(tokens, Value, State) -> - set_tokens(Value, State); -set(ln, Value, State) -> - set_ln(Value, State); -set(col, Value, State) -> - set_col(Value, State); -set(loc, Value, State) -> - set_loc(Value, State); -set(snap_loc, Value, State) -> - set_snap_loc(Value, State); -set(buffer_pos, Value, State) -> - set_buffer_pos(Value, State); -set(pos, Value, State) -> - set_pos(Value, State); -set(len, Value, State) -> - set_len(Value, State); -set(source, Value, State) -> - set_source(Value, State). - -get_input(#state{input = Input}) -> - Input. - -set_input(Input, #state{} = State) when is_binary(Input) -> - State#state{input = Input}. - -get_handler(#state{handler = Handler}) -> - Handler. - -set_handler(Handler, #state{} = State) when is_atom(Handler) -> - State#state{handler = Handler}. - -get_handler_state(#state{handler_state = HandlerState}) -> - HandlerState. - -set_handler_state(HandlerState, #state{} = State) -> - State#state{handler_state = HandlerState}. - -get_tokens(#state{tokens = Tokens}) -> - Tokens. - -set_tokens(Tokens, #state{} = State) when is_list(Tokens) -> - State#state{tokens = Tokens}. - -get_ln(#state{ln = Ln}) -> - Ln. - -set_ln(Ln, #state{} = State) when ?is_ln(Ln) -> - State#state{ln = Ln}. - -get_col(#state{col = Col}) -> - Col. - -set_col(Col, #state{} = State) when ?is_col(Col) -> - State#state{col = Col}. - -get_loc(#state{ln = Ln, col = Col}) -> - {Ln, Col}. - -set_loc({Ln, Col}, State) when ?is_ln(Ln), ?is_col(Col) -> - State#state{ - ln = Ln, - col = Col - }. - -get_snap_loc(#state{snap_loc = Col}) -> - Col. - -set_snap_loc({Ln, Col}, #state{} = State) when ?is_ln(Ln), ?is_col(Col) -> - State#state{snap_loc = {Ln, Col}}. - -get_buffer_pos(#state{buffer_pos = BufferPos}) -> - BufferPos. - -set_buffer_pos(BufferPos, #state{} = State) when ?is_position(BufferPos) -> - State#state{buffer_pos = BufferPos}. - -get_pos(#state{pos = Pos}) -> - Pos. - -set_pos(Pos, #state{} = State) when ?is_position(Pos) -> - State#state{pos = Pos}. - -get_len(#state{len = Len}) -> - Len. - -set_len(Len, #state{} = State) when ?is_length(Len) -> - State#state{len = Len}. - -get_source(#state{source = Source}) -> - Source. - -set_source(Source, #state{} = State) when ?is_source(Source) -> - State#state{source = Source}. - -%%%===================================================================== -%%% Internal functions -%%%===================================================================== - -% nothing here yet! - %%%===================================================================== %%% Tests -%%% TODO: All kind of missing tests. -%%% TODO: Move tests to "../test/bel_scan_SUITE.erl". %%%===================================================================== -ifdef(TEST). -compile([export_all, nowarn_export_all]). -% Callbacks - -init([]) -> - {ok, []}. - -handle_char(_Char, Rest, State) -> - continue(Rest, snapshot(incr_col(State))). - -handle_tokens(_Tokens, State) -> - State. - -% Support - -params(Input) -> - #{input => Input, handler => ?MODULE}. - -% Runners - -new_test() -> - [ { "Should raise 'function_clause' when wrong params" - , ?assertError(function_clause, new(#{}))} - , { "Should return a valid state" - , ?assertEqual(#state{ - input = <<>>, - handler = ?MODULE, - handler_state = maps:get(handler_state, ?DEFAULTS), - tokens = maps:get(tokens, ?DEFAULTS), - ln = maps:get(ln, ?DEFAULTS), - col = maps:get(col, ?DEFAULTS), - snap_loc = maps:get(snap_loc, ?DEFAULTS), - buffer_pos = maps:get(buffer_pos, ?DEFAULTS), - pos = maps:get(pos, ?DEFAULTS), - len = maps:get(len, ?DEFAULTS), - source = maps:get(source, ?DEFAULTS) - }, new(params(<<>>)))} - ]. - -string_test() -> - Input = <<"foo\nbar">>, - State = string([], new(params(Input))), - [ { "Should scan and return the tokens" - , ?assertEqual([], get_tokens(State))} - , { "Should return correct ln" - , ?assertEqual(2, get_ln(State))} - , { "Should return correct col" - , ?assertEqual(4, get_col(State))} - , { "Should return correct loc" - , ?assertEqual({2, 4}, get_loc(State))} - , { "Should return correct snap_loc" - , ?assertEqual({2, 4}, get_snap_loc(State))} - , { "Should return correct buffer_pos" - , ?assertEqual(7, get_buffer_pos(State))} - , { "Should return correct pos" - , ?assertEqual(0, get_pos(State))} - , { "Should return correct len" - , ?assertEqual(7, get_len(State))} - , { "Should return correct pos_text" - , ?assertEqual(Input, pos_text(State))} - ]. -endif. diff --git a/test/bel_scan_SUITE.erl b/test/bel_scan_SUITE.erl deleted file mode 100644 index 417db70..0000000 --- a/test/bel_scan_SUITE.erl +++ /dev/null @@ -1,151 +0,0 @@ -%%%--------------------------------------------------------------------- -%%% @copyright 2024 William Fank Thomé -%%% @author William Fank Thomé -%%% @doc bel_scan tests. -%%% -%%% Copyright 2024 William Fank Thomé -%%% -%%% Licensed under the Apache License, Version 2.0 (the "License"); -%%% you may not use this file except in compliance with the License. -%%% You may obtain a copy of the License at -%%% -%%% http://www.apache.org/licenses/LICENSE-2.0 -%%% -%%% Unless required by applicable law or agreed to in writing, software -%%% distributed under the License is distributed on an "AS IS" BASIS, -%%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -%%% See the License for the specific language governing permissions and -%%% limitations under the License. -%%% -%%% @end -%%%--------------------------------------------------------------------- --module(bel_scan_SUITE). - -% -include_lib("common_test/include/ct.hrl"). - -%% Callback functions --export([ suite/0 - , all/0 - , init_per_suite/1 - , end_per_suite/1 - , init_per_testcase/2 - , end_per_testcase/2 - ]). - -%% Test cases --export([ readme_example/1 - % TODO: bel_scan tests - ]). - -%%%===================================================================== -%%% Callback functions -%%%===================================================================== - -%%---------------------------------------------------------------------- -%% @doc Returns list of tuples to set default properties for the suite. -%% -%% @param Info List of key/value pairs. -%% -%% @end -%%---------------------------------------------------------------------- --spec suite() -> Info when - Info :: [tuple()]. - -suite() -> - []. - -%%---------------------------------------------------------------------- -%% @doc Initialization before the suite. -%% -%% @param Config A list of key/value pairs, holding the test case configuration. -%% -%% @end -%%---------------------------------------------------------------------- --spec init_per_suite(Config0) -> Config when - Config0 :: [tuple()], - Config :: [tuple()]. - -init_per_suite(Config) -> - Config. - -%%---------------------------------------------------------------------- -%% @doc Cleanup after the suite. -%% -%% @param Config A list of key/value pairs, holding the test case configuration. -%% -%% @end -%%---------------------------------------------------------------------- --spec end_per_suite(Config) -> Result when - Config :: [tuple()], - Result :: term(). - -end_per_suite(_Config) -> - ok. - -%%---------------------------------------------------------------------- -%% @doc Initialization before each test case. -%% -%% @param TestCase Name of the test case that is about to run. -%% @param Config A list of key/value pairs, holding the test case configuration. -%% -%% @end -%%---------------------------------------------------------------------- --spec init_per_testcase(TestCase, Config0) -> Config when - TestCase :: atom(), - Config0 :: [tuple()], - Config :: [tuple()]. - -init_per_testcase(_TestCase, Config) -> - Config. - -%%---------------------------------------------------------------------- -%% @doc Cleanup after each test case. -%% -%% @param TestCase Name of the test case that is finished. -%% @param Config A list of key/value pairs, holding the test case configuration. -%% -%% @end -%%---------------------------------------------------------------------- --spec end_per_testcase(TestCase, Config) -> Result when - TestCase :: atom(), - Config :: [tuple()], - Result :: term(). - -end_per_testcase(_TestCase, _Config) -> - ok. - -%%---------------------------------------------------------------------- -%% @doc Returns the list of groups and test cases that are to be executed. -%% -%% @param GroupName Name of a test case group. -%% @param TestCase Name of a test case. -%% -%% @end -%%---------------------------------------------------------------------- --spec all() -> GroupsAndTestCases when - GroupsAndTestCases :: [Group | TestCase], - Group :: {group, GroupName}, - GroupName :: atom(), - TestCase :: atom(). - -all() -> - [ readme_example - % TODO: bel_scan tests - ]. - -%%%===================================================================== -%%% Test cases -%%%===================================================================== - -readme_example(Config) when is_list(Config) -> - [ {text, {{1,1}, undefined, undefined},<<"foo ">>} - , {param, {{1,5}, undefined, undefined}, <<"bar">>} - , {text, {{1,14}, undefined, undefined}, <<" baz">>} - ] = my_scan:string(<<"foo {{ bar }} baz">>), - ok. - -%%%===================================================================== -%%% Support functions -%%%===================================================================== - -% nothing here yet! diff --git a/test/support/my_scan.erl b/test/support/my_scan.erl deleted file mode 100644 index 198dfe5..0000000 --- a/test/support/my_scan.erl +++ /dev/null @@ -1,73 +0,0 @@ --module(my_scan). --behaviour(bel_scan). - -% API --export([ string/1 ]). - -% bel_scan callbacks --export([ init/1, handle_char/3, handle_tokens/2 ]). - --import(bel_scan, [ incr_col/1 - , incr_col/2 - , new_ln/1 - , continue/2 - , skip_new_lns/2 - , update_pos/1 - , token/2 - , push_token/2 - , snapshot/1 - , fold/2 - ]). - --record(state, {}). - -%%%===================================================================== -%%% API -%%%===================================================================== - -string(Text) -> - Scan = bel_scan:new(#{ - input => Text, - handler => ?MODULE - }), - bel_scan:string(#{}, Scan). - -%%%===================================================================== -%%% bel_scan callbacks -%%%===================================================================== - -init(Opts) when is_map(Opts) -> - {ok, #state{}}. - -handle_char(${, <<${, $\s, Rest/bitstring>>, Scan) -> - scan_param(Rest, fold(Scan, [ - fun(S) -> push_token(token(text, S), S) end, - fun(S) -> snapshot(S) end, - fun(S) -> incr_col(3, S) end, - fun(S) -> update_pos(S) end - ])); -handle_char(_Char, <<>>, Scan) -> - continue(<<>>, fold(Scan, [ - fun(S) -> incr_col(S) end, - fun(S) -> push_token(token(text, S), S) end - ])); -handle_char(_Char, <>, Scan) -> - continue(Rest, incr_col(Scan)). - -handle_tokens(Tokens, _Scan) -> - lists:reverse(Tokens). - -%%%===================================================================== -%%% Internal functions -%%%===================================================================== - -scan_param(<<$\s, $}, $}, Rest/bitstring>>, Scan) -> - continue(Rest, fold(Scan, [ - fun(S) -> push_token(token(param, S), S) end, - fun(S) -> incr_col(3, S) end, - fun(S) -> update_pos(S) end, - fun(S) -> snapshot(S) end - ])); -scan_param(<>, Scan0) -> - {ok, {_Char, Rest, Scan}} = skip_new_lns(Rest0, Scan0), - scan_param(Rest, incr_col(Scan)). From cd3d8e0d29ce38781b6fc0bd3cc09acf0d49c35b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?William=20Fank=20Thom=C3=A9?= Date: Wed, 17 Apr 2024 18:29:09 -0300 Subject: [PATCH 02/25] feat: create location module --- src/bel_scan_loc.erl | 132 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 132 insertions(+) create mode 100644 src/bel_scan_loc.erl diff --git a/src/bel_scan_loc.erl b/src/bel_scan_loc.erl new file mode 100644 index 0000000..d169f1a --- /dev/null +++ b/src/bel_scan_loc.erl @@ -0,0 +1,132 @@ +%%%--------------------------------------------------------------------- +%%% @copyright 2024 William Fank Thomé +%%% @author William Fank Thomé +%%% @doc Location module. +%%% +%%% Copyright 2024 William Fank Thomé +%%% +%%% Licensed under the Apache License, Version 2.0 (the "License"); +%%% you may not use this file except in compliance with the License. +%%% You may obtain a copy of the License at +%%% +%%% http://www.apache.org/licenses/LICENSE-2.0 +%%% +%%% Unless required by applicable law or agreed to in writing, software +%%% distributed under the License is distributed on an "AS IS" BASIS, +%%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%%% See the License for the specific language governing permissions and +%%% limitations under the License. +%%% +%%% @end +%%%--------------------------------------------------------------------- +-module(bel_scan_loc). + +-export([ new/1 + , get_ln/1 + , set_ln/2 + , get_col/1 + , set_col/2 + , get_first_col/1 + , set_first_col/2 + , incr_ln/2 + , incr_col/2 + , new_ln/1 + , to_tuple/1 + ]). + +-export_type([ t/0 ]). + +-define(FIRST_LN, 1). +-define(FIRST_COL, 1). + +-ifdef(TEST). +-include_lib("eunit/include/eunit.hrl"). +-endif. + +-record(loc, { ln, col, first_col }). +-opaque t() :: #loc{}. + +%%%===================================================================== +%%% API +%%%===================================================================== + +new(Params) when is_map(Params) -> + FirstCol = maps:get(first_col, Params, ?FIRST_COL), + #loc{ + ln = maps:get(ln, Params, ?FIRST_LN), + col = maps:get(col, Params, FirstCol), + first_col = FirstCol + }. + +get_ln(#loc{ln = Ln}) -> + Ln. + +set_ln(Ln, #loc{} = Loc) -> + Loc#loc{ln = Ln}. + +get_col(#loc{col = Col}) -> + Col. + +set_col(Col, #loc{} = Loc) -> + Loc#loc{col = Col}. + +get_first_col(#loc{first_col = FirstCol}) -> + FirstCol. + +set_first_col(FirstCol, #loc{} = Loc) -> + Loc#loc{first_col = FirstCol}. + +incr_ln(N, #loc{ln = Ln} = Loc) -> + Loc#loc{ln = Ln+N}. + +incr_col(N, #loc{col = Col} = Loc) -> + Loc#loc{col = Col+N}. + +new_ln(#loc{ln = Ln, first_col = FirstCol} = Loc) -> + Loc#loc{ln = Ln+1, col = FirstCol}. + +to_tuple(#loc{ln = Ln, col = Col}) -> + {Ln, Col}. + +%%%===================================================================== +%%% Tests +%%%===================================================================== + +-ifdef(TEST). +-compile([export_all, nowarn_export_all]). + +new_test() -> + [ { "Should have default values" + , ?assertEqual(#loc{ + ln = ?FIRST_LN, + col = ?FIRST_COL, + first_col = ?FIRST_COL + }, new(#{})) + } + , { "Should have params values" + , ?assertEqual(#loc{ + ln = 6, + col = 6, + first_col = 6 + }, new(#{ + ln => 6, + col => 6, + first_col => 6 + })) + } + ]. + +loc_test() -> + Loc = new(#{}), + [ { "Should increment one line" + , ?assert((incr_ln(1, Loc))#loc.ln =:= ?FIRST_LN+1)} + , { "Should increment one column" + , ?assert((incr_col(1, Loc))#loc.col =:= ?FIRST_COL+1)} + , { "Should add a new line" + , ?assert( + (new_ln(Loc))#loc.ln =:= ?FIRST_LN+1 + andalso (new_ln(Loc))#loc.col =:= ?FIRST_COL + )} + ]. + +-endif. From 4d584f4dc9de578b3b92c106a103cc2f8f3076cc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?William=20Fank=20Thom=C3=A9?= Date: Wed, 17 Apr 2024 21:17:28 -0300 Subject: [PATCH 03/25] feat: create binary part module --- src/bel_scan_bpart.erl | 129 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 129 insertions(+) create mode 100644 src/bel_scan_bpart.erl diff --git a/src/bel_scan_bpart.erl b/src/bel_scan_bpart.erl new file mode 100644 index 0000000..d1559d4 --- /dev/null +++ b/src/bel_scan_bpart.erl @@ -0,0 +1,129 @@ +%%%--------------------------------------------------------------------- +%%% @copyright 2024 William Fank Thomé +%%% @author William Fank Thomé +%%% @doc Binary part module. +%%% +%%% Copyright 2024 William Fank Thomé +%%% +%%% Licensed under the Apache License, Version 2.0 (the "License"); +%%% you may not use this file except in compliance with the License. +%%% You may obtain a copy of the License at +%%% +%%% http://www.apache.org/licenses/LICENSE-2.0 +%%% +%%% Unless required by applicable law or agreed to in writing, software +%%% distributed under the License is distributed on an "AS IS" BASIS, +%%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%%% See the License for the specific language governing permissions and +%%% limitations under the License. +%%% +%%% @end +%%%--------------------------------------------------------------------- +-module(bel_scan_bpart). + +-export([ new/1 + , get_bin/1 + , set_bin/2 + , get_pos/1 + , set_pos/2 + , get_len/1 + , set_len/2 + , incr_pos/2 + , incr_len/2 + , get_part/1 + ]). + +-export_type([ t/0 ]). + +-define(FIRST_POS, 0). +-define(INIT_LEN, 0). + +-ifdef(TEST). +-include_lib("eunit/include/eunit.hrl"). +-endif. + +-record(bpart, { bin, pos, len }). +-opaque t() :: #bpart{}. + +%%%===================================================================== +%%% API +%%%===================================================================== + +new(Params) when is_map(Params) -> + #bpart{ + bin = maps:get(bin, Params), + pos = maps:get(pos, Params, ?FIRST_POS), + len = maps:get(len, Params, ?INIT_LEN) + }. + +get_bin(#bpart{bin = X}) -> + X. + +set_bin(X, #bpart{} = BPart) -> + BPart#bpart{bin = X}. + +get_pos(#bpart{pos = X}) -> + X. + +set_pos(X, #bpart{} = BPart) -> + BPart#bpart{pos = X}. + +get_len(#bpart{len = X}) -> + X. + +set_len(X, #bpart{} = BPart) -> + BPart#bpart{len = X}. + +incr_pos(N, #bpart{pos = Pos} = BPart) -> + BPart#bpart{pos = Pos+N}. + +incr_len(N, #bpart{len = Len} = BPart) -> + BPart#bpart{len = Len+N}. + +get_part(#bpart{bin = Bin} = BPart) -> + binary:part(Bin, BPart#bpart.pos, BPart#bpart.len). + +%%%===================================================================== +%%% Tests +%%%===================================================================== + +-ifdef(TEST). +-compile([export_all, nowarn_export_all]). + +new_test() -> + [ { "Should have default values" + , ?assertEqual(#bpart{ + bin = <<>>, + pos = ?FIRST_POS, + len = ?INIT_LEN + }, new(#{bin => <<>>})) + } + , { "Should have params values" + , ?assertEqual(#bpart{ + bin = <<>>, + pos = 6, + len = 6 + }, new(#{ + bin => <<>>, + pos => 6, + len => 6 + })) + } + ]. + +cursor_test() -> + BPart = new(#{bin => <<>>}), + [ { "Should increment one position" + , ?assert((incr_pos(1, BPart))#bpart.pos =:= ?FIRST_POS+1)} + , { "Should increment one length" + , ?assert((incr_len(1, BPart))#bpart.len =:= ?INIT_LEN+1)} + ]. + +get_part_test() -> + ?assertEqual(<<"bpart">>, get_part(new(#{ + bin => <<" bpart ">>, + pos => 2, + len => 5 + }))). + +-endif. From 3e4d5c34fe0f1ecdfc4d5e3c33f1b3877236ef47 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?William=20Fank=20Thom=C3=A9?= Date: Thu, 18 Apr 2024 12:01:03 -0300 Subject: [PATCH 04/25] feat: new scan implementation --- include/bel_scan_eng.hrl | 14 +++ rebar.config | 5 +- src/bel_scan.erl | 217 +++++++++++++++++++++++++++++++++++++++ src/bel_scan_anno.erl | 13 +++ src/bel_scan_eel_eng.erl | 67 ++++++++++++ src/bel_scan_eng.erl | 83 +++++++++++++++ src/bel_scan_mark.erl | 95 +++++++++++++++++ src/bel_scan_token.erl | 49 +++++++++ 8 files changed, 542 insertions(+), 1 deletion(-) create mode 100644 include/bel_scan_eng.hrl create mode 100644 src/bel_scan_anno.erl create mode 100644 src/bel_scan_eel_eng.erl create mode 100644 src/bel_scan_eng.erl create mode 100644 src/bel_scan_mark.erl create mode 100644 src/bel_scan_token.erl diff --git a/include/bel_scan_eng.hrl b/include/bel_scan_eng.hrl new file mode 100644 index 0000000..52ceed4 --- /dev/null +++ b/include/bel_scan_eng.hrl @@ -0,0 +1,14 @@ +%% This header exists to be possible to do pattern +%% matching in modules that behaves as bel_scan_eng. + +-record(marker, { + id :: bel_scan_mark:id(), + re_start :: bel_scan_mark:re_start(), + re_end :: bel_scan_mark:re_end() +}). + +-record(engine, { + module :: module(), + markers :: [bel_scan_mark:t()], + state :: bel_scan_eng:state() +}). diff --git a/rebar.config b/rebar.config index 166afdf..b4ccd1f 100644 --- a/rebar.config +++ b/rebar.config @@ -1,4 +1,4 @@ -{erl_opts, [debug_info, warnings_as_errors]}. +{erl_opts, [debug_info]}. {deps, []}. @@ -18,6 +18,9 @@ ]}. {profiles, [ + {prod, [ + {erl_opts, [no_debug_info, warnings_as_errors]} + ]}, {test, [ {erl_opts, [{extra_src_dirs, ["test/support"]}]} ]} diff --git a/src/bel_scan.erl b/src/bel_scan.erl index 7baf3ee..96ecdad 100644 --- a/src/bel_scan.erl +++ b/src/bel_scan.erl @@ -22,10 +22,227 @@ -module(bel_scan). -compile(inline_list_funcs). +-export([ new/1 + , bin/1 + , state/1 + , fold/2 + , token/3 + , text_token/2 + , push_token/2 + , push_tokens/2 + ]). + +-export_type([ t/0, input/0 ]). + +-import(bel_scan_loc, [ new_ln/1, incr_col/2 ]). +-import(bel_scan_bpart, [ incr_len/2, get_part/1 ]). + +-define(is_lf(X), ( + X =:= $\r orelse X =:= $\n orelse X =:= $\f +)). + +-define(DEFAULT_OPTS, #{}). + -ifdef(TEST). -include_lib("eunit/include/eunit.hrl"). -endif. +% TODO: rename loc to start_loc and add end_loc +-record(state, { engines, bpart, loc, tokens }). + +-opaque t() :: #state{}. +-type input() :: binary(). + +%%%===================================================================== +%%% API +%%%===================================================================== + +new(Params) when is_map(Params) -> + #state{ + engines = init_engines(maps:get(engines, Params, [bel_scan_eel_eng])), + bpart = maps:get(bpart, Params, bel_scan_bpart:new(#{ + bin => maps:get(bin, Params) + })), + loc = maps:get(loc, Params, bel_scan_loc:new(#{})), + tokens = maps:get(tokens, Params, []) + }. + +bin(Bin) -> + state(new(#{bin => Bin})). + +state(#state{bpart = BPart} = State) -> + continue(scan, bel_scan_bpart:get_bin(BPart), State). + +fold(#state{} = State, Funs) -> + lists:foldl(fun(F, S) -> F(S) end, State, Funs). + +text_token(Text, State) -> + token(text, Text, State). + +token(Id, Value, State) -> + {Id, anno(undefined, State), Value}. + +anno(Metadata, _State) -> + {start_loc, end_loc, Metadata}. + +push_token(Token, #state{tokens = Tokens} = State) -> + State#state{tokens = [Token | Tokens]}. + +push_tokens(Tokens, State) when is_list(Tokens) -> + lists:foldl(fun push_token/2, State, Tokens). + +%%%===================================================================== +%%% Internal functions +%%%===================================================================== + +init_engines(Modules) -> + [init_engine(Mod) || Mod <- Modules]. + +init_engine(Mod) when is_atom(Mod) -> + init_engine({Mod, ?DEFAULT_OPTS}); +init_engine({Mod, Opts}) when is_atom(Mod) -> + {Mod, bel_scan_eng:compile(Mod:init(Opts))}. + +continue(scan, <<>>, State) -> + terminate(State); +continue(scan, <>, State) -> + case scan(Rest0) of + {{new_ln, Incr}, Rest} -> + continue(scan, Rest, fold(State, [ + fun(S) -> S#state{loc = new_ln(S#state.loc)} end, + fun(S) -> S#state{bpart = incr_len(Incr, S#state.bpart)} end + ])); + {{continue, Incr}, Rest} -> + continue(find_start_markers, Rest, fold(State, [ + fun(S) -> S#state{loc = incr_col(Incr, S#state.loc)} end, + fun(S) -> S#state{bpart = incr_len(Incr, S#state.bpart)} end + ])); + terminate -> + terminate(State) + end; +continue(find_start_markers, <>, State) -> + case find_start_markers(State#state.engines, Rest, []) of + [] -> + continue(scan, Rest, State); + StartMarkers -> + continue({find_end_markers, StartMarkers}, Rest, State) + end; +continue({find_end_markers, StartMarkers}, <>, State) -> + case find_end_markers(StartMarkers, []) of + [{Mod, [{Match, Rest}]}] -> + continue({match, {Mod, Match}}, Rest, State); + [{Mod, [_|_] = EndMarkers}] -> + error({markers_conflict, {Mod, EndMarkers}}, [ + {find_end_markers, StartMarkers}, Rest0, State + ]); + [_|_] = Engs -> + error({engines_markers_conflict, Engs}, [ + {find_end_markers, StartMarkers}, Rest0, State + ]); + [] -> + error(miss_end_marker, [ + {find_end_markers, StartMarkers}, Rest0, State + ]) + end; +continue({match, Match}, Rest, State0) -> + + io:format("[MATCH] ~p~n", [get_part(State0#state.bpart)]), + + State = resolve_match(State0#state.engines, Match, State0), + continue(scan, Rest, State). + +% TODO: Check text +% TODO: handle_tokens. +terminate(State) -> + + io:format("[TERMINATE] ~p~n", [get_part(State#state.bpart)]), + + State. + +scan(<<$\r, $\n, Rest/binary>>) -> + {{new_ln, 2}, Rest}; +scan(<>) when ?is_lf(Char) -> + {{new_ln, 1}, Rest}; +scan(<<_, Rest/binary>>) -> + {{continue, 1}, Rest}; +scan(<<>>) -> + terminate. + +find_start_markers([{Mod, Eng} | Engs], Bin, Acc) -> + Markers = bel_scan_eng:get_markers(Eng), + case do_find_start_markers(Markers, Bin, []) of + [] -> + find_start_markers(Engs, Bin, Acc); + StartMarkers -> + find_start_markers(Engs, Bin, [{Mod, Eng, StartMarkers} | Acc]) + end; +find_start_markers([], _, Acc) -> + lists:reverse(Acc). + +do_find_start_markers([Marker | Markers], Bin, Acc) -> + case bel_scan_mark:re_start_match(Marker, Bin) of + {match, {Text, Groups, Rest}} -> + do_find_start_markers(Markers, Bin, [{Marker, Text, Groups, Rest} | Acc]); + nomatch -> + do_find_start_markers(Markers, Bin, Acc) + end; +do_find_start_markers([], _, Acc) -> + lists:reverse(Acc). + +find_end_markers([{Mod, _Eng, StartMarkers} | Engs], Acc) -> + case do_find_end_markers(StartMarkers, []) of + [] -> + find_end_markers(Engs, Acc); + MatchMarkers -> + find_end_markers(Engs, [{Mod, MatchMarkers} | Acc]) + end; +find_end_markers([], Acc) -> + lists:reverse(Acc). + +do_find_end_markers([{Marker, Bin, StartGroups, Rest} | Markers], Acc) -> + case end_marker_match(Bin, Marker) of + {true, {Text, EndGroups, <<>>}} -> + Captured = {StartGroups, EndGroups}, + MarkerId = bel_scan_mark:get_id(Marker), + Match = {MarkerId, Text, Captured}, + do_find_end_markers(Markers, [{Match, Rest} | Acc]); + % TODO: Check here. Not tested. + {true, nomarker} -> + Captured = {StartGroups, []}, + MarkerId = bel_scan_mark:get_id(Marker), + Match = {MarkerId, Bin, Captured}, + do_find_end_markers(Markers, [{Match, Rest} | Acc]); + false -> + do_find_end_markers(Markers, Acc) + end; +do_find_end_markers([], Acc) -> + lists:reverse(Acc). + +end_marker_match(<<>>, _Marker) -> + false; +end_marker_match(Bin, Marker) -> + case bel_scan_mark:re_end_match(Marker, Bin) of + {match, {Text, Groups, Rest}} -> + {true, {Text, Groups, Rest}}; + nomatch -> + <<_, Rest/binary>> = Bin, + end_marker_match(Rest, Marker); + nomarker -> + {true, nomarker} + end. + +resolve_match([{Mod, _Eng} | Engs], Match, State0) -> + case Mod:handle_match(Match, State0) of + {noreply, State} -> + resolve_match(Engs, Match, State); + {reply, Tokens, State} -> + resolve_match(Engs, Match, push_tokens(Tokens, State)); + {halt, State} -> + State + end; +resolve_match([], _Match, State) -> + State. + %%%===================================================================== %%% Tests %%%===================================================================== diff --git a/src/bel_scan_anno.erl b/src/bel_scan_anno.erl new file mode 100644 index 0000000..66e6c89 --- /dev/null +++ b/src/bel_scan_anno.erl @@ -0,0 +1,13 @@ +-module(bel_scan_anno). + +-export_type([ t/0, loc/0, text/0, meta/0 ]). + +-record(anno, { + loc :: loc(), + text :: binary(), + meta :: term() +}). +-opaque t() :: #anno{}. +-type loc() :: bel_loc:t(). +-type text() :: binary(). +-type meta() :: term(). diff --git a/src/bel_scan_eel_eng.erl b/src/bel_scan_eel_eng.erl new file mode 100644 index 0000000..079906d --- /dev/null +++ b/src/bel_scan_eel_eng.erl @@ -0,0 +1,67 @@ +-module(bel_scan_eel_eng). +-behaviour(bel_scan_eng). + +% bel_scan_eng callbacks +-export([ init/1 + , handle_start/1 + , handle_text/2 + , handle_match/2 + , handle_terminate/1 + ]). + +-include("bel_scan_eng.hrl"). + +init(_Opts) -> + #engine{ + markers = [ + #marker{ + id = inline, + re_start = <<"<%=\\s+">>, + re_end = <<"\\s+.%>">> + }, + #marker{ + id = start, + re_start = <<"<%=\\s+">>, + re_end = <<"\\s+%>">> + }, + #marker{ + id = continue, + re_start = <<"<%\\s+">>, + re_end = <<"\\s+%>">> + }, + #marker{ + id = terminate, + re_start = <<"<%\\s+">>, + re_end = <<"\\s+.%>">> + }, + #marker{ + id = comment, + re_start = <<"<%!--\\s+">>, + re_end = <<"\\s+--%>">> + } + ] + }. + +handle_start(State) -> + {noreply, State}. + +handle_text(_Text, State) -> + {noreply, State}. + +handle_match({?MODULE, {inline, Text, _Captured}}, State) -> + Token = bel_scan:token(inline, Text, State), + {reply, [Token], State}; +handle_match({?MODULE, {start, _Text, _Captured}}, State) -> + {noreply, State}; +handle_match({?MODULE, {continue, _Text, _Captured}}, State) -> + {noreply, State}; +handle_match({?MODULE, {terminate, _Text, _Captured}}, State) -> + {noreply, State}; +handle_match({?MODULE, {comment, Text, _Captured}}, State) -> + Token = bel_scan:token(comment, Text, State), + {reply, [Token], State}; +handle_match(_Match, State) -> + {noreply, State}. + +handle_terminate(State) -> + {noreply, State}. diff --git a/src/bel_scan_eng.erl b/src/bel_scan_eng.erl new file mode 100644 index 0000000..e7f3d9a --- /dev/null +++ b/src/bel_scan_eng.erl @@ -0,0 +1,83 @@ +-module(bel_scan_eng). + +-export([ compile/1 + , fold/2 + , get_module/1 + , set_module/2 + , get_markers/1 + , set_markers/2 + , get_state/1 + , set_state/2 + ]). + +% Callbacks + +-callback init(Opts) -> EngState + when Opts :: opts() + , EngState :: state() + . + +-callback handle_start(Scan0) -> Scan + when Scan0 :: scan() + , Scan :: scan() + . + +-callback handle_text(Text, Scan) -> Return + when Text :: binary() + , Scan :: scan() + , Return :: {noreply, scan()} + | {reply, binary(), scan()} + | {halt, scan()} + . + +-callback handle_match(Match, Scan) -> Return + when Match :: {MarkerMod, {MarkerId, Text, Captured}} + , MarkerMod :: module() + , MarkerId :: marker_id() + , Text :: binary() + , Captured :: captured() + , Scan :: scan() + , Return :: {noreply, scan()} + | {reply, binary(), scan()} + | {halt, scan()} + . + +-callback handle_terminate(Scan0) -> Scan + when Scan0 :: scan() + , Scan :: scan() + . + +% Libs + +-include("bel_scan_eng.hrl"). + +-type scan() :: bel_scan:t(). +-type marker_id() :: bel_scan_marker:id(). +-type opts() :: term(). +-type state() :: term(). +-type re_group() :: binary(). +-type captured() :: { Start :: [re_group()], End :: [re_group()] }. + +compile(#engine{markers = Markers} = Eng) -> + Eng#engine{markers = [bel_scan_mark:compile(M) || M <- Markers]}. + +fold(#engine{} = Eng, Funs) when is_list(Funs) -> + lists:foldl(fun(F, E) -> F(E) end, Eng, Funs). + +get_module(#engine{module = Module}) -> + Module. + +set_module(Module, #engine{} = Eng) when is_atom(Module) -> + Eng#engine{module = Module}. + +get_markers(#engine{markers = Markers}) -> + Markers. + +set_markers(Markers, #engine{} = Eng) -> + Eng#engine{markers = Markers}. + +get_state(#engine{state = State}) -> + State. + +set_state(State, #engine{} = Eng) -> + Eng#engine{state = State}. diff --git a/src/bel_scan_mark.erl b/src/bel_scan_mark.erl new file mode 100644 index 0000000..866d82c --- /dev/null +++ b/src/bel_scan_mark.erl @@ -0,0 +1,95 @@ +-module(bel_scan_mark). + +-export([ compile/1 + , re_start_match/2 + , re_end_match/2 + , get_id/1 + , set_id/2 + , get_re_start/1 + , set_re_start/2 + , get_re_end/1 + , set_re_end/2 + ]). + +-export_type([ t/0, id/0, re_start/0, re_end/0 ]). + +-define(is_re_pattern(X), ( + is_tuple(X) + andalso tuple_size(X) =:= 5 + andalso element(1, X) =:= re_pattern +)). + +-include("bel_scan_eng.hrl"). + +-opaque t() :: #marker{}. +-type id() :: term(). +-type re() :: binary() | re:mp(). +-type re_start() :: re(). +-type re_end() :: re() | undefined. + +compile(#marker{} = Marker) -> + case compile_re_start(Marker#marker.re_start) of + {ok, REStart} -> + case compile_re_end(Marker#marker.re_end) of + {ok, REEnd} -> + Marker#marker{ + re_start = REStart, + re_end = REEnd + }; + {error, Reason} -> + error({re_end, Reason}, [Marker]) + end; + {error, Reason} -> + error({re_start, Reason}, [Marker]) + end. + +compile_re_start(RE) when is_binary(RE) -> + re:compile(RE, [anchored]); +compile_re_start(Pattern) when ?is_re_pattern(Pattern) -> + {ok, Pattern}. + +compile_re_end(RE) when is_binary(RE) -> + re:compile(RE); +compile_re_end(Pattern) when ?is_re_pattern(Pattern) -> + {ok, Pattern}; +compile_re_end(undefined) -> + {ok, undefined}. + +re_start_match(#marker{re_start = RE}, Bin) -> + re_match(Bin, RE, []). + +re_end_match(#marker{re_end = undefined}, _Bin) -> + nomarker; +re_end_match(#marker{re_end = RE}, Bin) -> + re_match(Bin, RE, [anchored]). + +re_match(Bin, RE, Opts) -> + case re:run(Bin, RE, [{capture, all, binary}]) of + {match, [MatchText | Groups]} -> + Len = byte_size(Bin) - byte_size(MatchText), + MarkerText = binary:part(Bin, 0, Len), + Text = re:replace(MarkerText, RE, <<>>, [{return, binary} | Opts]), + Rest = binary:part(Bin, Len, Len - byte_size(Text)), + % TODO: Return a byte_size to adjust the position. + {match, {Text, Groups, Rest}}; + nomatch -> + nomatch + end. + +get_id(#marker{id = Id}) -> + Id. + +set_id(Id, #marker{} = Marker) -> + Marker#marker{id = Id}. + +get_re_start(#marker{re_start = REStart}) -> + REStart. + +set_re_start(REStart, #marker{} = Marker) -> + Marker#marker{re_start = REStart}. + +get_re_end(#marker{re_end = REEnd}) -> + REEnd. + +set_re_end(REEnd, #marker{} = Marker) -> + Marker#marker{re_end = REEnd}. diff --git a/src/bel_scan_token.erl b/src/bel_scan_token.erl new file mode 100644 index 0000000..1102ba8 --- /dev/null +++ b/src/bel_scan_token.erl @@ -0,0 +1,49 @@ +% TODO: Delete. +-module(bel_scan_token). + +-export([ new/1 + , get_id/1 + , set_id/2 + , get_anno/1 + , set_anno/2 + , get_value/1 + , set_value/2 + ]). + +-export_type([ t/0, id/0, anno/0, value/0 ]). + +-record(token, { + id :: id(), + anno :: anno(), + value :: term() +}). + +-opaque t() :: #token{}. +-type id() :: term(). +-type anno() :: bel_scan:t(). +-type value() :: term(). + +new(Params) when is_map(Params) -> + #token{ + id = maps:get(id, Params), + anno = maps:get(anno, Params), + value = maps:get(value, Params) + }. + +get_id(#token{id = Id}) -> + Id. + +set_id(Id, #token{} = Token) -> + Token#token{id = Id}. + +get_anno(#token{anno = Anno}) -> + Anno. + +set_anno(Anno, #token{} = Token) -> + Token#token{anno = Anno}. + +get_value(#token{value = Value}) -> + Value. + +set_value(Value, #token{} = Token) -> + Token#token{value = Value}. From a854d6c5343a0e2d976f7108ed3906750bc1586c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?William=20Fank=20Thom=C3=A9?= Date: Fri, 19 Apr 2024 01:39:49 -0300 Subject: [PATCH 05/25] feat: simplify using just one regex --- include/bel_scan_eng.hrl | 5 +- src/bel_scan.erl | 162 ++++++++++++++------------------------- src/bel_scan_eel_eng.erl | 46 +++++------ src/bel_scan_eng.erl | 8 +- src/bel_scan_loc.erl | 19 +++++ src/bel_scan_mark.erl | 80 +++++-------------- src/bel_scan_read.erl | 16 ++++ 7 files changed, 142 insertions(+), 194 deletions(-) create mode 100644 src/bel_scan_read.erl diff --git a/include/bel_scan_eng.hrl b/include/bel_scan_eng.hrl index 52ceed4..857d19b 100644 --- a/include/bel_scan_eng.hrl +++ b/include/bel_scan_eng.hrl @@ -2,9 +2,8 @@ %% matching in modules that behaves as bel_scan_eng. -record(marker, { - id :: bel_scan_mark:id(), - re_start :: bel_scan_mark:re_start(), - re_end :: bel_scan_mark:re_end() + id :: bel_scan_mark:id(), + re :: bel_scan_mark:re() }). -record(engine, { diff --git a/src/bel_scan.erl b/src/bel_scan.erl index 96ecdad..f39c340 100644 --- a/src/bel_scan.erl +++ b/src/bel_scan.erl @@ -37,10 +37,6 @@ -import(bel_scan_loc, [ new_ln/1, incr_col/2 ]). -import(bel_scan_bpart, [ incr_len/2, get_part/1 ]). --define(is_lf(X), ( - X =:= $\r orelse X =:= $\n orelse X =:= $\f -)). - -define(DEFAULT_OPTS, #{}). -ifdef(TEST). @@ -82,9 +78,14 @@ text_token(Text, State) -> token(Id, Value, State) -> {Id, anno(undefined, State), Value}. +% TODO anno(Metadata, _State) -> {start_loc, end_loc, Metadata}. +% TODO +clear_text(State) -> + State. + push_token(Token, #state{tokens = Tokens} = State) -> State#state{tokens = [Token | Tokens]}. @@ -106,7 +107,7 @@ init_engine({Mod, Opts}) when is_atom(Mod) -> continue(scan, <<>>, State) -> terminate(State); continue(scan, <>, State) -> - case scan(Rest0) of + case bel_scan_read:bin(Rest0) of {{new_ln, Incr}, Rest} -> continue(scan, Rest, fold(State, [ fun(S) -> S#state{loc = new_ln(S#state.loc)} end, @@ -120,127 +121,76 @@ continue(scan, <>, State) -> terminate -> terminate(State) end; -continue(find_start_markers, <>, State) -> - case find_start_markers(State#state.engines, Rest, []) of - [] -> - continue(scan, Rest, State); - StartMarkers -> - continue({find_end_markers, StartMarkers}, Rest, State) - end; -continue({find_end_markers, StartMarkers}, <>, State) -> - case find_end_markers(StartMarkers, []) of - [{Mod, [{Match, Rest}]}] -> - continue({match, {Mod, Match}}, Rest, State); - [{Mod, [_|_] = EndMarkers}] -> - error({markers_conflict, {Mod, EndMarkers}}, [ - {find_end_markers, StartMarkers}, Rest0, State - ]); - [_|_] = Engs -> - error({engines_markers_conflict, Engs}, [ - {find_end_markers, StartMarkers}, Rest0, State - ]); - [] -> - error(miss_end_marker, [ - {find_end_markers, StartMarkers}, Rest0, State - ]) +continue(find_start_markers, <>, State) -> + case find_marker(State#state.engines, Rest0) of + {match, {Match, Rest}} -> + continue({match, Match}, Rest, State); + nomatch -> + continue(scan, Rest0, State) end; continue({match, Match}, Rest, State0) -> - - io:format("[MATCH] ~p~n", [get_part(State0#state.bpart)]), - - State = resolve_match(State0#state.engines, Match, State0), + State1 = case get_part(State0#state.bpart) of + <<>> -> + State0; + Text -> + handle_text(State0#state.engines, Text, State0) + end, + State = handle_match(State0#state.engines, Match, State1), continue(scan, Rest, State). -% TODO: Check text -% TODO: handle_tokens. terminate(State) -> + case get_part(State#state.bpart) of + <<>> -> + State; + Text -> + handle_text(State#state.engines, Text, State) + end. - io:format("[TERMINATE] ~p~n", [get_part(State#state.bpart)]), - - State. - -scan(<<$\r, $\n, Rest/binary>>) -> - {{new_ln, 2}, Rest}; -scan(<>) when ?is_lf(Char) -> - {{new_ln, 1}, Rest}; -scan(<<_, Rest/binary>>) -> - {{continue, 1}, Rest}; -scan(<<>>) -> - terminate. - -find_start_markers([{Mod, Eng} | Engs], Bin, Acc) -> +find_marker([{Mod, Eng} | Engs], Bin) -> Markers = bel_scan_eng:get_markers(Eng), - case do_find_start_markers(Markers, Bin, []) of - [] -> - find_start_markers(Engs, Bin, Acc); - StartMarkers -> - find_start_markers(Engs, Bin, [{Mod, Eng, StartMarkers} | Acc]) + case do_find_marker(Markers, Bin) of + {match, {Marker, Groups, Rest}} -> + MarkerId = bel_scan_mark:get_id(Marker), + Match = {Mod, MarkerId, Groups}, + {match, {Match, Rest}}; + nomatch -> + find_marker(Engs, Bin) end; -find_start_markers([], _, Acc) -> - lists:reverse(Acc). +find_marker([], _) -> + nomatch. -do_find_start_markers([Marker | Markers], Bin, Acc) -> - case bel_scan_mark:re_start_match(Marker, Bin) of - {match, {Text, Groups, Rest}} -> - do_find_start_markers(Markers, Bin, [{Marker, Text, Groups, Rest} | Acc]); +do_find_marker([Marker | Markers], Bin) -> + case bel_scan_mark:re_match(Marker, Bin) of + {match, {Groups, Rest}} -> + {match, {Marker, Groups, Rest}}; nomatch -> - do_find_start_markers(Markers, Bin, Acc) + do_find_marker(Markers, Bin) end; -do_find_start_markers([], _, Acc) -> - lists:reverse(Acc). - -find_end_markers([{Mod, _Eng, StartMarkers} | Engs], Acc) -> - case do_find_end_markers(StartMarkers, []) of - [] -> - find_end_markers(Engs, Acc); - MatchMarkers -> - find_end_markers(Engs, [{Mod, MatchMarkers} | Acc]) - end; -find_end_markers([], Acc) -> - lists:reverse(Acc). +do_find_marker([], _) -> + nomatch. -do_find_end_markers([{Marker, Bin, StartGroups, Rest} | Markers], Acc) -> - case end_marker_match(Bin, Marker) of - {true, {Text, EndGroups, <<>>}} -> - Captured = {StartGroups, EndGroups}, - MarkerId = bel_scan_mark:get_id(Marker), - Match = {MarkerId, Text, Captured}, - do_find_end_markers(Markers, [{Match, Rest} | Acc]); - % TODO: Check here. Not tested. - {true, nomarker} -> - Captured = {StartGroups, []}, - MarkerId = bel_scan_mark:get_id(Marker), - Match = {MarkerId, Bin, Captured}, - do_find_end_markers(Markers, [{Match, Rest} | Acc]); - false -> - do_find_end_markers(Markers, Acc) +handle_text([{Mod, _Eng} | Engs], Text0, State0) -> + case Mod:handle_text(Text0, State0) of + {noreply, State} -> + handle_text(Engs, Text0, State); + {reply, Text, State} -> + handle_text(Engs, Text, State); + {halt, State} -> + State end; -do_find_end_markers([], Acc) -> - lists:reverse(Acc). - -end_marker_match(<<>>, _Marker) -> - false; -end_marker_match(Bin, Marker) -> - case bel_scan_mark:re_end_match(Marker, Bin) of - {match, {Text, Groups, Rest}} -> - {true, {Text, Groups, Rest}}; - nomatch -> - <<_, Rest/binary>> = Bin, - end_marker_match(Rest, Marker); - nomarker -> - {true, nomarker} - end. +handle_text([], Text, State) -> + push_token(text_token(Text, State), State). -resolve_match([{Mod, _Eng} | Engs], Match, State0) -> +handle_match([{Mod, _Eng} | Engs], Match, State0) -> case Mod:handle_match(Match, State0) of {noreply, State} -> - resolve_match(Engs, Match, State); + handle_match(Engs, Match, State); {reply, Tokens, State} -> - resolve_match(Engs, Match, push_tokens(Tokens, State)); + handle_match(Engs, Match, push_tokens(Tokens, State)); {halt, State} -> State end; -resolve_match([], _Match, State) -> +handle_match([], _Match, State) -> State. %%%===================================================================== diff --git a/src/bel_scan_eel_eng.erl b/src/bel_scan_eel_eng.erl index 079906d..8f2dade 100644 --- a/src/bel_scan_eel_eng.erl +++ b/src/bel_scan_eel_eng.erl @@ -16,28 +16,38 @@ init(_Opts) -> markers = [ #marker{ id = inline, - re_start = <<"<%=\\s+">>, - re_end = <<"\\s+.%>">> + re = << + "(<%=\\s+)(.*)?(\\s+\\.%>)" "|" + "(<%=\\s+)(.*)?(^(\\s*\\.%>))" + >> }, #marker{ id = start, - re_start = <<"<%=\\s+">>, - re_end = <<"\\s+%>">> - }, + re = << + "(<%=\\s+)(.*)?(\\s+%>)" "|" + "(<%=\\s+)(.*)?(^(\\s*%>))" + >> + } #marker{ id = continue, - re_start = <<"<%\\s+">>, - re_end = <<"\\s+%>">> + re = << + "(<%\\s+)(.*)?(\\s+%>)" "|" + "(<%\\s+)(.*)?(^(\\s*%>))" + >> }, #marker{ id = terminate, - re_start = <<"<%\\s+">>, - re_end = <<"\\s+.%>">> + re = << + "(<%\\s+)(.*)?(\\s+\\.%>)" "|" + "(<%\\s+)(.*)?(^(\\s*\\.%>))" + >> }, #marker{ id = comment, - re_start = <<"<%!--\\s+">>, - re_end = <<"\\s+--%>">> + re = << + "(<%!--\\s+)(.*)?(\\s+--%>)" "|" + "(<%!--\\s+)(.*)?(^(\\s*--%>))" + >> } ] }. @@ -48,17 +58,9 @@ handle_start(State) -> handle_text(_Text, State) -> {noreply, State}. -handle_match({?MODULE, {inline, Text, _Captured}}, State) -> - Token = bel_scan:token(inline, Text, State), - {reply, [Token], State}; -handle_match({?MODULE, {start, _Text, _Captured}}, State) -> - {noreply, State}; -handle_match({?MODULE, {continue, _Text, _Captured}}, State) -> - {noreply, State}; -handle_match({?MODULE, {terminate, _Text, _Captured}}, State) -> - {noreply, State}; -handle_match({?MODULE, {comment, Text, _Captured}}, State) -> - Token = bel_scan:token(comment, Text, State), +handle_match({?MODULE, MarkerId, Captured}, State) -> + [_StartMarker, Expr, _EndMarker] = Captured, + Token = bel_scan:token(MarkerId, Expr, State), {reply, [Token], State}; handle_match(_Match, State) -> {noreply, State}. diff --git a/src/bel_scan_eng.erl b/src/bel_scan_eng.erl index e7f3d9a..9c8ecb6 100644 --- a/src/bel_scan_eng.erl +++ b/src/bel_scan_eng.erl @@ -31,14 +31,13 @@ . -callback handle_match(Match, Scan) -> Return - when Match :: {MarkerMod, {MarkerId, Text, Captured}} + when Match :: {MarkerMod, {MarkerId, Captured}} , MarkerMod :: module() , MarkerId :: marker_id() - , Text :: binary() , Captured :: captured() , Scan :: scan() , Return :: {noreply, scan()} - | {reply, binary(), scan()} + | {reply, [token()], scan()} | {halt, scan()} . @@ -53,10 +52,11 @@ -type scan() :: bel_scan:t(). -type marker_id() :: bel_scan_marker:id(). +-type token() :: bel_scan_token:t(). -type opts() :: term(). -type state() :: term(). -type re_group() :: binary(). --type captured() :: { Start :: [re_group()], End :: [re_group()] }. +-type captured() :: [re_group()]. compile(#engine{markers = Markers} = Eng) -> Eng#engine{markers = [bel_scan_mark:compile(M) || M <- Markers]}. diff --git a/src/bel_scan_loc.erl b/src/bel_scan_loc.erl index d169f1a..5eb4450 100644 --- a/src/bel_scan_loc.erl +++ b/src/bel_scan_loc.erl @@ -22,6 +22,8 @@ -module(bel_scan_loc). -export([ new/1 + , read/1 + , read/2 , get_ln/1 , set_ln/2 , get_col/1 @@ -44,6 +46,7 @@ -endif. -record(loc, { ln, col, first_col }). + -opaque t() :: #loc{}. %%%===================================================================== @@ -58,6 +61,22 @@ new(Params) when is_map(Params) -> first_col = FirstCol }. +read(Bin) -> + read(Bin, new(#{})). + +read(Bin, #loc{} = Loc) when is_binary(Bin) -> + do_read(Bin, Loc). + +do_read(Bin, Loc) -> + case bel_scan_read:bin(Bin) of + {{new_ln, Incr}, Rest} -> + do_read(Rest, incr_ln(Incr, Loc)); + {{continue, Incr}, Rest} -> + do_read(Rest, incr_col(Incr, Loc)); + terminate -> + Loc + end. + get_ln(#loc{ln = Ln}) -> Ln. diff --git a/src/bel_scan_mark.erl b/src/bel_scan_mark.erl index 866d82c..c5be846 100644 --- a/src/bel_scan_mark.erl +++ b/src/bel_scan_mark.erl @@ -1,17 +1,14 @@ -module(bel_scan_mark). -export([ compile/1 - , re_start_match/2 - , re_end_match/2 + , re_match/2 , get_id/1 , set_id/2 - , get_re_start/1 - , set_re_start/2 - , get_re_end/1 - , set_re_end/2 + , get_re/1 + , set_re/2 ]). --export_type([ t/0, id/0, re_start/0, re_end/0 ]). +-export_type([ t/0, id/0, re/0 ]). -define(is_re_pattern(X), ( is_tuple(X) @@ -21,57 +18,28 @@ -include("bel_scan_eng.hrl"). --opaque t() :: #marker{}. --type id() :: term(). --type re() :: binary() | re:mp(). --type re_start() :: re(). --type re_end() :: re() | undefined. +-opaque t() :: #marker{}. +-type id() :: term(). +-type re() :: binary() | re:mp(). compile(#marker{} = Marker) -> - case compile_re_start(Marker#marker.re_start) of - {ok, REStart} -> - case compile_re_end(Marker#marker.re_end) of - {ok, REEnd} -> - Marker#marker{ - re_start = REStart, - re_end = REEnd - }; - {error, Reason} -> - error({re_end, Reason}, [Marker]) - end; + case compile_re(Marker#marker.re) of + {ok, RE} -> + Marker#marker{re = RE}; {error, Reason} -> - error({re_start, Reason}, [Marker]) + error({re, Reason}, [Marker]) end. -compile_re_start(RE) when is_binary(RE) -> - re:compile(RE, [anchored]); -compile_re_start(Pattern) when ?is_re_pattern(Pattern) -> +compile_re(RE) when is_binary(RE) -> + re:compile(RE, [anchored, multiline, ucp, {newline, anycrlf}]); +compile_re(Pattern) when ?is_re_pattern(Pattern) -> {ok, Pattern}. -compile_re_end(RE) when is_binary(RE) -> - re:compile(RE); -compile_re_end(Pattern) when ?is_re_pattern(Pattern) -> - {ok, Pattern}; -compile_re_end(undefined) -> - {ok, undefined}. - -re_start_match(#marker{re_start = RE}, Bin) -> - re_match(Bin, RE, []). - -re_end_match(#marker{re_end = undefined}, _Bin) -> - nomarker; -re_end_match(#marker{re_end = RE}, Bin) -> - re_match(Bin, RE, [anchored]). - -re_match(Bin, RE, Opts) -> +re_match(#marker{re = RE}, Bin) -> case re:run(Bin, RE, [{capture, all, binary}]) of {match, [MatchText | Groups]} -> - Len = byte_size(Bin) - byte_size(MatchText), - MarkerText = binary:part(Bin, 0, Len), - Text = re:replace(MarkerText, RE, <<>>, [{return, binary} | Opts]), - Rest = binary:part(Bin, Len, Len - byte_size(Text)), - % TODO: Return a byte_size to adjust the position. - {match, {Text, Groups, Rest}}; + <<_:(byte_size(MatchText))/binary, Rest/binary>> = Bin, + {match, {Groups, Rest}}; nomatch -> nomatch end. @@ -82,14 +50,8 @@ get_id(#marker{id = Id}) -> set_id(Id, #marker{} = Marker) -> Marker#marker{id = Id}. -get_re_start(#marker{re_start = REStart}) -> - REStart. - -set_re_start(REStart, #marker{} = Marker) -> - Marker#marker{re_start = REStart}. - -get_re_end(#marker{re_end = REEnd}) -> - REEnd. +get_re(#marker{re = RE}) -> + RE. -set_re_end(REEnd, #marker{} = Marker) -> - Marker#marker{re_end = REEnd}. +set_re(RE, #marker{} = Marker) -> + Marker#marker{re = RE}. diff --git a/src/bel_scan_read.erl b/src/bel_scan_read.erl new file mode 100644 index 0000000..346d9ba --- /dev/null +++ b/src/bel_scan_read.erl @@ -0,0 +1,16 @@ +-module(bel_scan_read). + +-export([ bin/1 ]). + +-define(is_lf(X), ( + X =:= $\r orelse X =:= $\n orelse X =:= $\f +)). + +bin(<<$\r, $\n, Rest/binary>>) -> + {{new_ln, 2}, Rest}; +bin(<>) when ?is_lf(Char) -> + {{new_ln, 1}, Rest}; +bin(<<_, Rest/binary>>) -> + {{continue, 1}, Rest}; +bin(<<>>) -> + terminate. From 9fde0a5024173a8c19bf4126ef9f7b7c0bbc5f24 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?William=20Fank=20Thom=C3=A9?= Date: Fri, 19 Apr 2024 01:58:24 -0300 Subject: [PATCH 06/25] feat: implement handle_start callback --- src/bel_scan.erl | 23 ++++++++++++++++++++--- src/bel_scan_eel_eng.erl | 4 ++-- src/bel_scan_eng.erl | 11 +++++++---- 3 files changed, 29 insertions(+), 9 deletions(-) diff --git a/src/bel_scan.erl b/src/bel_scan.erl index f39c340..09d41cf 100644 --- a/src/bel_scan.erl +++ b/src/bel_scan.erl @@ -57,17 +57,17 @@ new(Params) when is_map(Params) -> #state{ engines = init_engines(maps:get(engines, Params, [bel_scan_eel_eng])), bpart = maps:get(bpart, Params, bel_scan_bpart:new(#{ - bin => maps:get(bin, Params) + bin => maps:get(bin, Params, <<>>) })), loc = maps:get(loc, Params, bel_scan_loc:new(#{})), tokens = maps:get(tokens, Params, []) }. bin(Bin) -> - state(new(#{bin => Bin})). + start(Bin, new(#{})). state(#state{bpart = BPart} = State) -> - continue(scan, bel_scan_bpart:get_bin(BPart), State). + start(bel_scan_bpart:get_bin(BPart), State). fold(#state{} = State, Funs) -> lists:foldl(fun(F, S) -> F(S) end, State, Funs). @@ -104,6 +104,11 @@ init_engine(Mod) when is_atom(Mod) -> init_engine({Mod, Opts}) when is_atom(Mod) -> {Mod, bel_scan_eng:compile(Mod:init(Opts))}. +start(Bin0, State0) -> + {Bin, State} = handle_start(State0#state.engines, Bin0, State0), + BPart = bel_scan_bpart:set_bin(Bin, State#state.bpart), + continue(scan, Bin, State#state{bpart = BPart}). + continue(scan, <<>>, State) -> terminate(State); continue(scan, <>, State) -> @@ -169,6 +174,18 @@ do_find_marker([Marker | Markers], Bin) -> do_find_marker([], _) -> nomatch. +handle_start([{Mod, _Eng} | Engs], Bin0, State0) -> + case Mod:handle_start(Bin0, State0) of + {noreply, State} -> + handle_start(Engs, Bin0, State); + {reply, Bin, State} -> + handle_start(Engs, Bin, State); + {halt, State} -> + State + end; +handle_start([], Bin, State) -> + {Bin, State}. + handle_text([{Mod, _Eng} | Engs], Text0, State0) -> case Mod:handle_text(Text0, State0) of {noreply, State} -> diff --git a/src/bel_scan_eel_eng.erl b/src/bel_scan_eel_eng.erl index 8f2dade..18cdac4 100644 --- a/src/bel_scan_eel_eng.erl +++ b/src/bel_scan_eel_eng.erl @@ -3,7 +3,7 @@ % bel_scan_eng callbacks -export([ init/1 - , handle_start/1 + , handle_start/2 , handle_text/2 , handle_match/2 , handle_terminate/1 @@ -52,7 +52,7 @@ init(_Opts) -> ] }. -handle_start(State) -> +handle_start(_Bin, State) -> {noreply, State}. handle_text(_Text, State) -> diff --git a/src/bel_scan_eng.erl b/src/bel_scan_eng.erl index 9c8ecb6..f3aab01 100644 --- a/src/bel_scan_eng.erl +++ b/src/bel_scan_eng.erl @@ -17,10 +17,13 @@ , EngState :: state() . --callback handle_start(Scan0) -> Scan - when Scan0 :: scan() - , Scan :: scan() - . +-callback handle_start(Bin, Scan) -> Return + when Bin :: binary() + , Scan :: scan() + , Return :: {noreply, scan()} + | {reply, binary(), scan()} + | {halt, scan()} + . -callback handle_text(Text, Scan) -> Return when Text :: binary() From abd486efb45b61ac6320172292e0702be2a15c9a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?William=20Fank=20Thom=C3=A9?= Date: Fri, 19 Apr 2024 02:18:02 -0300 Subject: [PATCH 07/25] feat: implement handle_terminate callback --- src/bel_scan.erl | 45 +++++++++++++++++++++++++++++----------- src/bel_scan_eel_eng.erl | 4 ++-- src/bel_scan_eng.erl | 9 ++++---- 3 files changed, 40 insertions(+), 18 deletions(-) diff --git a/src/bel_scan.erl b/src/bel_scan.erl index 09d41cf..f81ce86 100644 --- a/src/bel_scan.erl +++ b/src/bel_scan.erl @@ -83,11 +83,11 @@ anno(Metadata, _State) -> {start_loc, end_loc, Metadata}. % TODO -clear_text(State) -> - State. +clear_text(#state{bpart = BPart} = State) -> + State#state{bpart = bel_scan_bpart:set_len(0, BPart)}. push_token(Token, #state{tokens = Tokens} = State) -> - State#state{tokens = [Token | Tokens]}. + State#state{tokens = Tokens ++ [Token]}. push_tokens(Tokens, State) when is_list(Tokens) -> lists:foldl(fun push_token/2, State, Tokens). @@ -137,19 +137,28 @@ continue({match, Match}, Rest, State0) -> State1 = case get_part(State0#state.bpart) of <<>> -> State0; - Text -> - handle_text(State0#state.engines, Text, State0) + Text0 -> + {Text, State2} = handle_text(State0#state.engines, Text0, State0), + fold(State2, [ + fun(S) -> push_token(text_token(Text, S), S) end, + fun(S) -> clear_text(S) end + ]) end, State = handle_match(State0#state.engines, Match, State1), continue(scan, Rest, State). -terminate(State) -> - case get_part(State#state.bpart) of +terminate(State0) -> + State = case get_part(State0#state.bpart) of <<>> -> - State; - Text -> - handle_text(State#state.engines, Text, State) - end. + State0; + Text0 -> + {Text, State1} = handle_text(State0#state.engines, Text0, State0), + fold(State1, [ + fun(S) -> push_token(text_token(Text, S), S) end, + fun(S) -> clear_text(S) end + ]) + end, + handle_terminate(State#state.engines, State#state.tokens, State). find_marker([{Mod, Eng} | Engs], Bin) -> Markers = bel_scan_eng:get_markers(Eng), @@ -196,7 +205,7 @@ handle_text([{Mod, _Eng} | Engs], Text0, State0) -> State end; handle_text([], Text, State) -> - push_token(text_token(Text, State), State). + {Text, State}. handle_match([{Mod, _Eng} | Engs], Match, State0) -> case Mod:handle_match(Match, State0) of @@ -210,6 +219,18 @@ handle_match([{Mod, _Eng} | Engs], Match, State0) -> handle_match([], _Match, State) -> State. +handle_terminate([{Mod, _Eng} | Engs], Tokens0, State0) -> + case Mod:handle_terminate(Tokens0, State0) of + {noreply, State} -> + handle_terminate(Engs, Tokens0, State); + {reply, Tokens, State} -> + handle_terminate(Engs, Tokens, State); + {halt, State} -> + State + end; +handle_terminate([], Tokens, State) -> + {Tokens, State}. + %%%===================================================================== %%% Tests %%%===================================================================== diff --git a/src/bel_scan_eel_eng.erl b/src/bel_scan_eel_eng.erl index 18cdac4..c572f85 100644 --- a/src/bel_scan_eel_eng.erl +++ b/src/bel_scan_eel_eng.erl @@ -6,7 +6,7 @@ , handle_start/2 , handle_text/2 , handle_match/2 - , handle_terminate/1 + , handle_terminate/2 ]). -include("bel_scan_eng.hrl"). @@ -65,5 +65,5 @@ handle_match({?MODULE, MarkerId, Captured}, State) -> handle_match(_Match, State) -> {noreply, State}. -handle_terminate(State) -> +handle_terminate(_Tokens, State) -> {noreply, State}. diff --git a/src/bel_scan_eng.erl b/src/bel_scan_eng.erl index f3aab01..c6bbaab 100644 --- a/src/bel_scan_eng.erl +++ b/src/bel_scan_eng.erl @@ -44,10 +44,11 @@ | {halt, scan()} . --callback handle_terminate(Scan0) -> Scan - when Scan0 :: scan() - , Scan :: scan() - . +-callback handle_terminate(Tokens, Scan0) -> Scan + when Tokens :: [token()] + , Scan0 :: scan() + , Scan :: scan() + . % Libs From 728cb7d4d88cf2ecffa069e2066240e1c8dc4b3e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?William=20Fank=20Thom=C3=A9?= Date: Fri, 19 Apr 2024 02:46:15 -0300 Subject: [PATCH 08/25] feat: update location after match --- src/bel_scan.erl | 14 ++++++----- src/bel_scan_loc.erl | 57 ++++++++++++++++++++++++++++--------------- src/bel_scan_mark.erl | 2 +- 3 files changed, 47 insertions(+), 26 deletions(-) diff --git a/src/bel_scan.erl b/src/bel_scan.erl index f81ce86..f63dac4 100644 --- a/src/bel_scan.erl +++ b/src/bel_scan.erl @@ -128,8 +128,9 @@ continue(scan, <>, State) -> end; continue(find_start_markers, <>, State) -> case find_marker(State#state.engines, Rest0) of - {match, {Match, Rest}} -> - continue({match, Match}, Rest, State); + {match, {Match, Loc0, Rest}} -> + Loc = bel_scan_loc:incr(Loc0, State#state.loc), + continue({match, Match}, Rest, State#state{loc = Loc}); nomatch -> continue(scan, Rest0, State) end; @@ -163,10 +164,11 @@ terminate(State0) -> find_marker([{Mod, Eng} | Engs], Bin) -> Markers = bel_scan_eng:get_markers(Eng), case do_find_marker(Markers, Bin) of - {match, {Marker, Groups, Rest}} -> + {match, {Marker, MatchText, Groups, Rest}} -> MarkerId = bel_scan_mark:get_id(Marker), Match = {Mod, MarkerId, Groups}, - {match, {Match, Rest}}; + Loc = bel_scan_loc:read(MatchText), + {match, {Match, Loc, Rest}}; nomatch -> find_marker(Engs, Bin) end; @@ -175,8 +177,8 @@ find_marker([], _) -> do_find_marker([Marker | Markers], Bin) -> case bel_scan_mark:re_match(Marker, Bin) of - {match, {Groups, Rest}} -> - {match, {Marker, Groups, Rest}}; + {match, {MatchText, Groups, Rest}} -> + {match, {Marker, MatchText, Groups, Rest}}; nomatch -> do_find_marker(Markers, Bin) end; diff --git a/src/bel_scan_loc.erl b/src/bel_scan_loc.erl index 5eb4450..8486d41 100644 --- a/src/bel_scan_loc.erl +++ b/src/bel_scan_loc.erl @@ -24,16 +24,19 @@ -export([ new/1 , read/1 , read/2 + , incr/2 + , incr_ln/2 + , incr_col/2 + , new_ln/1 + , to_tuple/1 , get_ln/1 , set_ln/2 , get_col/1 , set_col/2 + , get_first_ln/1 + , set_first_ln/2 , get_first_col/1 , set_first_col/2 - , incr_ln/2 - , incr_col/2 - , new_ln/1 - , to_tuple/1 ]). -export_type([ t/0 ]). @@ -45,7 +48,7 @@ -include_lib("eunit/include/eunit.hrl"). -endif. --record(loc, { ln, col, first_col }). +-record(loc, { ln, col, first_ln, first_col }). -opaque t() :: #loc{}. @@ -54,10 +57,12 @@ %%%===================================================================== new(Params) when is_map(Params) -> + FirstLn = maps:get(first_ln, Params, ?FIRST_LN), FirstCol = maps:get(first_col, Params, ?FIRST_COL), #loc{ - ln = maps:get(ln, Params, ?FIRST_LN), + ln = maps:get(ln, Params, FirstLn), col = maps:get(col, Params, FirstCol), + first_ln = FirstLn, first_col = FirstCol }. @@ -70,13 +75,33 @@ read(Bin, #loc{} = Loc) when is_binary(Bin) -> do_read(Bin, Loc) -> case bel_scan_read:bin(Bin) of {{new_ln, Incr}, Rest} -> - do_read(Rest, incr_ln(Incr, Loc)); + do_read(Rest, incr({Incr, Loc#loc.first_col}, Loc)); {{continue, Incr}, Rest} -> do_read(Rest, incr_col(Incr, Loc)); terminate -> Loc end. +% FIXME: I think it's not ok. +incr(#loc{ln = Ln, col = Col}, Loc) -> + incr({Ln, Col}, Loc); +incr({Ln, Col}, #loc{first_ln = Ln} = Loc) -> + incr_col(Col, Loc); +incr({Ln, Col}, Loc) -> + set_col(Col, incr_ln(Ln, Loc)). + +incr_ln(N, #loc{ln = Ln} = Loc) -> + Loc#loc{ln = Ln+N}. + +incr_col(N, #loc{col = Col} = Loc) -> + Loc#loc{col = Col+N}. + +new_ln(#loc{ln = Ln, first_col = FirstCol} = Loc) -> + Loc#loc{ln = Ln+1, col = FirstCol}. + +to_tuple(#loc{ln = Ln, col = Col}) -> + {Ln, Col}. + get_ln(#loc{ln = Ln}) -> Ln. @@ -89,24 +114,18 @@ get_col(#loc{col = Col}) -> set_col(Col, #loc{} = Loc) -> Loc#loc{col = Col}. +get_first_ln(#loc{first_ln = FirstLn}) -> + FirstLn. + +set_first_ln(FirstLn, #loc{} = Loc) -> + Loc#loc{first_ln = FirstLn}. + get_first_col(#loc{first_col = FirstCol}) -> FirstCol. set_first_col(FirstCol, #loc{} = Loc) -> Loc#loc{first_col = FirstCol}. -incr_ln(N, #loc{ln = Ln} = Loc) -> - Loc#loc{ln = Ln+N}. - -incr_col(N, #loc{col = Col} = Loc) -> - Loc#loc{col = Col+N}. - -new_ln(#loc{ln = Ln, first_col = FirstCol} = Loc) -> - Loc#loc{ln = Ln+1, col = FirstCol}. - -to_tuple(#loc{ln = Ln, col = Col}) -> - {Ln, Col}. - %%%===================================================================== %%% Tests %%%===================================================================== diff --git a/src/bel_scan_mark.erl b/src/bel_scan_mark.erl index c5be846..4795efe 100644 --- a/src/bel_scan_mark.erl +++ b/src/bel_scan_mark.erl @@ -39,7 +39,7 @@ re_match(#marker{re = RE}, Bin) -> case re:run(Bin, RE, [{capture, all, binary}]) of {match, [MatchText | Groups]} -> <<_:(byte_size(MatchText))/binary, Rest/binary>> = Bin, - {match, {Groups, Rest}}; + {match, {MatchText, Groups, Rest}}; nomatch -> nomatch end. From f5662e16a6fcd0f7cb81ed2ef6e133650f1ee62d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?William=20Fank=20Thom=C3=A9?= Date: Fri, 19 Apr 2024 11:35:44 -0300 Subject: [PATCH 09/25] fix: position issues --- src/bel_scan.erl | 169 +++++++++++++++++------------- src/bel_scan_bpart.erl | 25 +++-- src/bel_scan_eel_eng.erl | 14 ++- src/bel_scan_eng.erl | 5 +- src/bel_scan_loc.erl | 50 ++++++--- test/bel_scan_SUITE.erl | 153 +++++++++++++++++++++++++++ test/support/support_scan_eng.erl | 72 +++++++++++++ 7 files changed, 390 insertions(+), 98 deletions(-) create mode 100644 test/bel_scan_SUITE.erl create mode 100644 test/support/support_scan_eng.erl diff --git a/src/bel_scan.erl b/src/bel_scan.erl index f63dac4..a6c8fa4 100644 --- a/src/bel_scan.erl +++ b/src/bel_scan.erl @@ -23,11 +23,14 @@ -compile(inline_list_funcs). -export([ new/1 - , bin/1 + , bin/2 , state/1 , fold/2 - , token/3 , text_token/2 + , text_token/3 + , token/3 + , token/4 + , token/5 , push_token/2 , push_tokens/2 ]). @@ -38,13 +41,13 @@ -import(bel_scan_bpart, [ incr_len/2, get_part/1 ]). -define(DEFAULT_OPTS, #{}). +-define(DEFAULT_META, undefined). -ifdef(TEST). -include_lib("eunit/include/eunit.hrl"). -endif. -% TODO: rename loc to start_loc and add end_loc --record(state, { engines, bpart, loc, tokens }). +-record(state, { engines, bpart, loc, prev_loc, tokens }). -opaque t() :: #state{}. -type input() :: binary(). @@ -54,17 +57,19 @@ %%%===================================================================== new(Params) when is_map(Params) -> + Loc = maps:get(loc, Params, bel_scan_loc:new(#{})), #state{ - engines = init_engines(maps:get(engines, Params, [bel_scan_eel_eng])), + engines = init_engines(maps:get(engines, Params)), bpart = maps:get(bpart, Params, bel_scan_bpart:new(#{ bin => maps:get(bin, Params, <<>>) })), - loc = maps:get(loc, Params, bel_scan_loc:new(#{})), + loc = Loc, + prev_loc = maps:get(prev_loc, Params, Loc), tokens = maps:get(tokens, Params, []) }. -bin(Bin) -> - start(Bin, new(#{})). +bin(Bin, Opts) -> + start(Bin, new(Opts)). state(#state{bpart = BPart} = State) -> start(bel_scan_bpart:get_bin(BPart), State). @@ -73,16 +78,25 @@ fold(#state{} = State, Funs) -> lists:foldl(fun(F, S) -> F(S) end, State, Funs). text_token(Text, State) -> - token(text, Text, State). + text_token(Text, State#state.prev_loc, State#state.loc). -token(Id, Value, State) -> - {Id, anno(undefined, State), Value}. +text_token(Text, InitLoc, EndLoc) -> + token(text, Text, InitLoc, EndLoc). -% TODO -anno(Metadata, _State) -> - {start_loc, end_loc, Metadata}. +token(Id, Value, {InitLoc, EndLoc}) -> + token(Id, Value, InitLoc, EndLoc, ?DEFAULT_META). + +token(Id, Value, InitLoc, EndLoc) -> + token(Id, Value, InitLoc, EndLoc, ?DEFAULT_META). + +token(Id, Value, InitLoc, EndLoc, Metadata) -> + {Id, anno(InitLoc, EndLoc, Metadata), Value}. + +anno(InitLoc0, EndLoc0, Metadata) -> + InitLoc = bel_scan_loc:to_tuple(InitLoc0), + EndLoc = bel_scan_loc:to_tuple(EndLoc0), + {InitLoc, EndLoc, Metadata}. -% TODO clear_text(#state{bpart = BPart} = State) -> State#state{bpart = bel_scan_bpart:set_len(0, BPart)}. @@ -105,9 +119,9 @@ init_engine({Mod, Opts}) when is_atom(Mod) -> {Mod, bel_scan_eng:compile(Mod:init(Opts))}. start(Bin0, State0) -> - {Bin, State} = handle_start(State0#state.engines, Bin0, State0), - BPart = bel_scan_bpart:set_bin(Bin, State#state.bpart), - continue(scan, Bin, State#state{bpart = BPart}). + State = handle_start(Bin0, State0), + Bin = bel_scan_bpart:get_bin(State#state.bpart), + continue(scan, Bin, State). continue(scan, <<>>, State) -> terminate(State); @@ -126,49 +140,38 @@ continue(scan, <>, State) -> terminate -> terminate(State) end; -continue(find_start_markers, <>, State) -> - case find_marker(State#state.engines, Rest0) of - {match, {Match, Loc0, Rest}} -> - Loc = bel_scan_loc:incr(Loc0, State#state.loc), - continue({match, Match}, Rest, State#state{loc = Loc}); +continue(find_start_markers, <>, State0) -> + case find_marker(State0#state.engines, Rest0) of + {match, {Mod, MarkerId, MatchText, Captured, Rest}} -> + State1 = handle_text(State0), + InitLoc = State1#state.loc, + MatchTextLoc = bel_scan_loc:read(MatchText), + EndLoc = bel_scan_loc:incr(MatchTextLoc, InitLoc), + Pos = bel_scan_loc:get_pos(MatchTextLoc) + bel_scan_loc:get_pos(InitLoc), + BPart = bel_scan_bpart:reset_pos(Pos, State1#state.bpart), + Match = {Mod, MarkerId, MatchText, Captured, {InitLoc, EndLoc}}, + continue({handle_match, Match}, Rest, State1#state{ + loc = EndLoc, + prev_loc = EndLoc, + bpart = BPart + }); nomatch -> - continue(scan, Rest0, State) + continue(scan, Rest0, State0) end; -continue({match, Match}, Rest, State0) -> - State1 = case get_part(State0#state.bpart) of - <<>> -> - State0; - Text0 -> - {Text, State2} = handle_text(State0#state.engines, Text0, State0), - fold(State2, [ - fun(S) -> push_token(text_token(Text, S), S) end, - fun(S) -> clear_text(S) end - ]) - end, - State = handle_match(State0#state.engines, Match, State1), +continue({handle_match, Match}, Rest, State0) -> + State = handle_match(Match, State0), continue(scan, Rest, State). terminate(State0) -> - State = case get_part(State0#state.bpart) of - <<>> -> - State0; - Text0 -> - {Text, State1} = handle_text(State0#state.engines, Text0, State0), - fold(State1, [ - fun(S) -> push_token(text_token(Text, S), S) end, - fun(S) -> clear_text(S) end - ]) - end, - handle_terminate(State#state.engines, State#state.tokens, State). + State = handle_text(State0), + handle_terminate(State). find_marker([{Mod, Eng} | Engs], Bin) -> Markers = bel_scan_eng:get_markers(Eng), case do_find_marker(Markers, Bin) of - {match, {Marker, MatchText, Groups, Rest}} -> + {match, {Marker, MatchText, Captured, Rest}} -> MarkerId = bel_scan_mark:get_id(Marker), - Match = {Mod, MarkerId, Groups}, - Loc = bel_scan_loc:read(MatchText), - {match, {Match, Loc, Rest}}; + {match, {Mod, MarkerId, MatchText, Captured, Rest}}; nomatch -> find_marker(Engs, Bin) end; @@ -177,61 +180,82 @@ find_marker([], _) -> do_find_marker([Marker | Markers], Bin) -> case bel_scan_mark:re_match(Marker, Bin) of - {match, {MatchText, Groups, Rest}} -> - {match, {Marker, MatchText, Groups, Rest}}; + {match, {MatchText, Captured, Rest}} -> + {match, {Marker, MatchText, Captured, Rest}}; nomatch -> do_find_marker(Markers, Bin) end; do_find_marker([], _) -> nomatch. -handle_start([{Mod, _Eng} | Engs], Bin0, State0) -> +handle_start(Bin, State) -> + do_handle_start(State#state.engines, Bin, State). + +do_handle_start([{Mod, _Eng} | Engs], Bin0, State0) -> case Mod:handle_start(Bin0, State0) of {noreply, State} -> - handle_start(Engs, Bin0, State); + do_handle_start(Engs, Bin0, State); {reply, Bin, State} -> - handle_start(Engs, Bin, State); + do_handle_start(Engs, Bin, State); {halt, State} -> State end; -handle_start([], Bin, State) -> - {Bin, State}. +do_handle_start([], Bin, State) -> + BPart = bel_scan_bpart:set_bin(Bin, State#state.bpart), + State#state{bpart = BPart}. + +handle_text(State) -> + handle_text(get_part(State#state.bpart), State). + +handle_text(<<>>, State) -> + State; +handle_text(Text, State) -> + do_handle_text(State#state.engines, Text, State). -handle_text([{Mod, _Eng} | Engs], Text0, State0) -> +do_handle_text([{Mod, _Eng} | Engs], Text0, State0) -> case Mod:handle_text(Text0, State0) of {noreply, State} -> - handle_text(Engs, Text0, State); + do_handle_text(Engs, Text0, State); {reply, Text, State} -> - handle_text(Engs, Text, State); + do_handle_text(Engs, Text, State); {halt, State} -> State end; -handle_text([], Text, State) -> - {Text, State}. +do_handle_text([], Text, State) -> + fold(State, [ + fun(S) -> push_token(text_token(Text, S), S) end, + fun(S) -> clear_text(S) end + ]). -handle_match([{Mod, _Eng} | Engs], Match, State0) -> +handle_match(Match, State) -> + do_handle_match(State#state.engines, Match, State). + +do_handle_match([{Mod, _Eng} | Engs], Match, State0) -> case Mod:handle_match(Match, State0) of {noreply, State} -> - handle_match(Engs, Match, State); + do_handle_match(Engs, Match, State); {reply, Tokens, State} -> - handle_match(Engs, Match, push_tokens(Tokens, State)); + do_handle_match(Engs, Match, push_tokens(Tokens, State)); {halt, State} -> State end; -handle_match([], _Match, State) -> +do_handle_match([], _Match, State) -> State. -handle_terminate([{Mod, _Eng} | Engs], Tokens0, State0) -> +handle_terminate(State) -> + do_handle_terminate(State#state.engines, State#state.tokens, State). + +do_handle_terminate([{Mod, _Eng} | Engs], Tokens0, State0) -> case Mod:handle_terminate(Tokens0, State0) of {noreply, State} -> - handle_terminate(Engs, Tokens0, State); + do_handle_terminate(Engs, Tokens0, State); {reply, Tokens, State} -> - handle_terminate(Engs, Tokens, State); + do_handle_terminate(Engs, Tokens, State); {halt, State} -> State end; -handle_terminate([], Tokens, State) -> - {Tokens, State}. +do_handle_terminate([], Tokens, State) -> + State#state{tokens = Tokens}. %%%===================================================================== %%% Tests @@ -240,5 +264,6 @@ handle_terminate([], Tokens, State) -> -ifdef(TEST). -compile([export_all, nowarn_export_all]). +% TODO -endif. diff --git a/src/bel_scan_bpart.erl b/src/bel_scan_bpart.erl index d1559d4..f3a517b 100644 --- a/src/bel_scan_bpart.erl +++ b/src/bel_scan_bpart.erl @@ -22,6 +22,7 @@ -module(bel_scan_bpart). -export([ new/1 + , reset_pos/2 , get_bin/1 , set_bin/2 , get_pos/1 @@ -56,6 +57,21 @@ new(Params) when is_map(Params) -> len = maps:get(len, Params, ?INIT_LEN) }. +reset_pos(Pos, #bpart{} = BPart) -> + BPart#bpart{ + pos = Pos, + len = ?INIT_LEN + }. + +incr_pos(N, #bpart{pos = Pos} = BPart) -> + BPart#bpart{pos = Pos+N}. + +incr_len(N, #bpart{len = Len} = BPart) -> + BPart#bpart{len = Len+N}. + +get_part(#bpart{bin = Bin} = BPart) -> + binary:part(Bin, BPart#bpart.pos, BPart#bpart.len). + get_bin(#bpart{bin = X}) -> X. @@ -74,15 +90,6 @@ get_len(#bpart{len = X}) -> set_len(X, #bpart{} = BPart) -> BPart#bpart{len = X}. -incr_pos(N, #bpart{pos = Pos} = BPart) -> - BPart#bpart{pos = Pos+N}. - -incr_len(N, #bpart{len = Len} = BPart) -> - BPart#bpart{len = Len+N}. - -get_part(#bpart{bin = Bin} = BPart) -> - binary:part(Bin, BPart#bpart.pos, BPart#bpart.len). - %%%===================================================================== %%% Tests %%%===================================================================== diff --git a/src/bel_scan_eel_eng.erl b/src/bel_scan_eel_eng.erl index c572f85..39e56fe 100644 --- a/src/bel_scan_eel_eng.erl +++ b/src/bel_scan_eel_eng.erl @@ -11,6 +11,10 @@ -include("bel_scan_eng.hrl"). +%%%===================================================================== +%%% bel_scan_eng callbacks +%%%===================================================================== + init(_Opts) -> #engine{ markers = [ @@ -58,12 +62,18 @@ handle_start(_Bin, State) -> handle_text(_Text, State) -> {noreply, State}. -handle_match({?MODULE, MarkerId, Captured}, State) -> +handle_match({?MODULE, MarkerId, _Text, Captured, EndLoc}, State0) -> [_StartMarker, Expr, _EndMarker] = Captured, - Token = bel_scan:token(MarkerId, Expr, State), + {Token, State} = bel_scan:token(MarkerId, Expr, EndLoc, undefined, State0), {reply, [Token], State}; handle_match(_Match, State) -> {noreply, State}. handle_terminate(_Tokens, State) -> {noreply, State}. + +%%%===================================================================== +%%% Internal functions +%%%===================================================================== + +% nothing here yet! diff --git a/src/bel_scan_eng.erl b/src/bel_scan_eng.erl index c6bbaab..830e98e 100644 --- a/src/bel_scan_eng.erl +++ b/src/bel_scan_eng.erl @@ -34,10 +34,12 @@ . -callback handle_match(Match, Scan) -> Return - when Match :: {MarkerMod, {MarkerId, Captured}} + when Match :: {MarkerMod, MarkerId, Text, Captured, EndLoc} , MarkerMod :: module() , MarkerId :: marker_id() + , Text :: binary() , Captured :: captured() + , EndLoc :: loc() , Scan :: scan() , Return :: {noreply, scan()} | {reply, [token()], scan()} @@ -57,6 +59,7 @@ -type scan() :: bel_scan:t(). -type marker_id() :: bel_scan_marker:id(). -type token() :: bel_scan_token:t(). +-type loc() :: bel_scan_loc:t(). -type opts() :: term(). -type state() :: term(). -type re_group() :: binary(). diff --git a/src/bel_scan_loc.erl b/src/bel_scan_loc.erl index 8486d41..7284596 100644 --- a/src/bel_scan_loc.erl +++ b/src/bel_scan_loc.erl @@ -25,10 +25,13 @@ , read/1 , read/2 , incr/2 + , incr_pos/2 , incr_ln/2 , incr_col/2 , new_ln/1 , to_tuple/1 + , get_pos/1 + , set_pos/2 , get_ln/1 , set_ln/2 , get_col/1 @@ -48,7 +51,7 @@ -include_lib("eunit/include/eunit.hrl"). -endif. --record(loc, { ln, col, first_ln, first_col }). +-record(loc, { pos, ln, col, first_ln, first_col }). -opaque t() :: #loc{}. @@ -60,6 +63,7 @@ new(Params) when is_map(Params) -> FirstLn = maps:get(first_ln, Params, ?FIRST_LN), FirstCol = maps:get(first_col, Params, ?FIRST_COL), #loc{ + pos = maps:get(len, Params, 0), ln = maps:get(ln, Params, FirstLn), col = maps:get(col, Params, FirstCol), first_ln = FirstLn, @@ -75,33 +79,51 @@ read(Bin, #loc{} = Loc) when is_binary(Bin) -> do_read(Bin, Loc) -> case bel_scan_read:bin(Bin) of {{new_ln, Incr}, Rest} -> - do_read(Rest, incr({Incr, Loc#loc.first_col}, Loc)); + do_read(Rest, new_ln(incr_col(Incr, Loc))); {{continue, Incr}, Rest} -> do_read(Rest, incr_col(Incr, Loc)); terminate -> Loc end. -% FIXME: I think it's not ok. incr(#loc{ln = Ln, col = Col}, Loc) -> - incr({Ln, Col}, Loc); + set_pos(Loc#loc.pos, incr({Ln, Col}, Loc)); incr({Ln, Col}, #loc{first_ln = Ln} = Loc) -> - incr_col(Col, Loc); -incr({Ln, Col}, Loc) -> - set_col(Col, incr_ln(Ln, Loc)). - -incr_ln(N, #loc{ln = Ln} = Loc) -> - Loc#loc{ln = Ln+N}. + incr_pos(Col - Loc#loc.first_col, + set_col(Col + Loc#loc.col, Loc)); +incr({Ln, Col}, #loc{} = Loc) -> + set_col(Col, set_ln(Ln, Loc)). + +incr_pos(N, #loc{pos = Pos} = Loc) -> + Loc#loc{pos = Pos+N}. + +incr_ln(N, #loc{ln = Ln, pos = Pos} = Loc) -> + Loc#loc{ + ln = Ln+N, + pos = Pos+N + }. -incr_col(N, #loc{col = Col} = Loc) -> - Loc#loc{col = Col+N}. +incr_col(N, #loc{col = Col, pos = Pos} = Loc) -> + Loc#loc{ + col = Col+N, + pos = Pos+N + }. -new_ln(#loc{ln = Ln, first_col = FirstCol} = Loc) -> - Loc#loc{ln = Ln+1, col = FirstCol}. +new_ln(#loc{ln = Ln, first_col = FirstCol, pos = Pos} = Loc) -> + Loc#loc{ + ln = Ln+1, + col = FirstCol + }. to_tuple(#loc{ln = Ln, col = Col}) -> {Ln, Col}. +get_pos(#loc{pos = Pos}) -> + Pos. + +set_pos(Pos, #loc{} = Loc) -> + Loc#loc{pos = Pos}. + get_ln(#loc{ln = Ln}) -> Ln. diff --git a/test/bel_scan_SUITE.erl b/test/bel_scan_SUITE.erl new file mode 100644 index 0000000..c87904e --- /dev/null +++ b/test/bel_scan_SUITE.erl @@ -0,0 +1,153 @@ +%%%--------------------------------------------------------------------- +%%% @copyright 2024 William Fank Thomé +%%% @author William Fank Thomé +%%% @doc bel_scan tests. +%%% +%%% Copyright 2024 William Fank Thomé +%%% +%%% Licensed under the Apache License, Version 2.0 (the "License"); +%%% you may not use this file except in compliance with the License. +%%% You may obtain a copy of the License at +%%% +%%% http://www.apache.org/licenses/LICENSE-2.0 +%%% +%%% Unless required by applicable law or agreed to in writing, software +%%% distributed under the License is distributed on an "AS IS" BASIS, +%%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%%% See the License for the specific language governing permissions and +%%% limitations under the License. +%%% +%%% @end +%%%--------------------------------------------------------------------- +-module(bel_scan_SUITE). + +% -include_lib("common_test/include/ct.hrl"). + +%% Callback functions +-export([ suite/0 + , all/0 + , init_per_suite/1 + , end_per_suite/1 + , init_per_testcase/2 + , end_per_testcase/2 + ]). + +%% Test cases +-export([ bin/1 ]). + +%%%===================================================================== +%%% Callback functions +%%%===================================================================== + +%%---------------------------------------------------------------------- +%% @doc Returns list of tuples to set default properties for the suite. +%% +%% @param Info List of key/value pairs. +%% +%% @end +%%---------------------------------------------------------------------- +-spec suite() -> Info when + Info :: [tuple()]. + +suite() -> + []. + +%%---------------------------------------------------------------------- +%% @doc Initialization before the suite. +%% +%% @param Config A list of key/value pairs, holding the test case configuration. +%% +%% @end +%%---------------------------------------------------------------------- +-spec init_per_suite(Config0) -> Config when + Config0 :: [tuple()], + Config :: [tuple()]. + +init_per_suite(Config) -> + Config. + +%%---------------------------------------------------------------------- +%% @doc Cleanup after the suite. +%% +%% @param Config A list of key/value pairs, holding the test case configuration. +%% +%% @end +%%---------------------------------------------------------------------- +-spec end_per_suite(Config) -> Result when + Config :: [tuple()], + Result :: term(). + +end_per_suite(_Config) -> + ok. + +%%---------------------------------------------------------------------- +%% @doc Initialization before each test case. +%% +%% @param TestCase Name of the test case that is about to run. +%% @param Config A list of key/value pairs, holding the test case configuration. +%% +%% @end +%%---------------------------------------------------------------------- +-spec init_per_testcase(TestCase, Config0) -> Config when + TestCase :: atom(), + Config0 :: [tuple()], + Config :: [tuple()]. + +init_per_testcase(_TestCase, Config) -> + Config. + +%%---------------------------------------------------------------------- +%% @doc Cleanup after each test case. +%% +%% @param TestCase Name of the test case that is finished. +%% @param Config A list of key/value pairs, holding the test case configuration. +%% +%% @end +%%---------------------------------------------------------------------- +-spec end_per_testcase(TestCase, Config) -> Result when + TestCase :: atom(), + Config :: [tuple()], + Result :: term(). + +end_per_testcase(_TestCase, _Config) -> + ok. + +%%---------------------------------------------------------------------- +%% @doc Returns the list of groups and test cases that are to be executed. +%% +%% @param GroupName Name of a test case group. +%% @param TestCase Name of a test case. +%% +%% @end +%%---------------------------------------------------------------------- +-spec all() -> GroupsAndTestCases when + GroupsAndTestCases :: [Group | TestCase], + Group :: {group, GroupName}, + GroupName :: atom(), + TestCase :: atom(). + +all() -> + [ bin ]. + +%%%===================================================================== +%%% Test cases +%%%===================================================================== + +% foo {{ {{A, b}, {0, "C"}} }} bar +bin(Config) when is_list(Config) -> + Expect = error, + Bin = << +"foo {{ {{A, b}, + {0, \"C\"}} }} + bar" + >>, + Expect = bel_scan:bin(Bin, #{ + engines => [support_scan_eng] + }), + ok. + +%%%===================================================================== +%%% Support functions +%%%===================================================================== + +% nothing here yet! diff --git a/test/support/support_scan_eng.erl b/test/support/support_scan_eng.erl new file mode 100644 index 0000000..923a322 --- /dev/null +++ b/test/support/support_scan_eng.erl @@ -0,0 +1,72 @@ +%%%--------------------------------------------------------------------- +%%% @copyright 2024 William Fank Thomé +%%% @author William Fank Thomé +%%% @doc Support engine. +%%% +%%% Copyright 2024 William Fank Thomé +%%% +%%% Licensed under the Apache License, Version 2.0 (the "License"); +%%% you may not use this file except in compliance with the License. +%%% You may obtain a copy of the License at +%%% +%%% http://www.apache.org/licenses/LICENSE-2.0 +%%% +%%% Unless required by applicable law or agreed to in writing, software +%%% distributed under the License is distributed on an "AS IS" BASIS, +%%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%%% See the License for the specific language governing permissions and +%%% limitations under the License. +%%% +%%% @end +%%%--------------------------------------------------------------------- +-module(support_scan_eng). +-behaviour(bel_scan_eng). + +% bel_scan_eng callbacks +-export([ init/1 + , handle_start/2 + , handle_text/2 + , handle_match/2 + , handle_terminate/2 + ]). + +-include("bel_scan_eng.hrl"). + +%%%===================================================================== +%%% bel_scan_eng callbacks +%%%===================================================================== + +init(_Opts) -> + #engine{ + markers = [ + #marker{ + id = expr, + re = << + "({{\\s*)(.*[^}]+[^{]+[^\\s])(\\s*}})" "|" + "({{\\s*)(.*[^\\s])(\\s*}})" + >> + } + ] + }. + +handle_start(_Bin, State) -> + {noreply, State}. + +handle_text(_Text, State) -> + {noreply, State}. + +handle_match({?MODULE, expr, _Text, Captured, Loc}, State) -> + [_SMarker, Expr, _EMarker] = Captured, + Token = bel_scan:token(expr, Expr, Loc), + {reply, [Token], State}; +handle_match(_Match, State) -> + {noreply, State}. + +handle_terminate(_Tokens, State) -> + {noreply, State}. + +%%%===================================================================== +%%% Internal functions +%%%===================================================================== + +% nothing here yet! From 97f34273d8a7cc9dfe1f6c5ca353e3fef8181f7e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?William=20Fank=20Thom=C3=A9?= Date: Mon, 22 Apr 2024 23:58:49 -0300 Subject: [PATCH 10/25] feat: small fixes --- src/bel_scan.erl | 71 +++++++++++++++++-- src/bel_scan_anno.erl | 18 ++--- src/bel_scan_bpart.erl | 29 ++++---- src/bel_scan_eng.erl | 28 ++++++-- ..._scan_eel_eng.erl => bel_scan_eng_eel.erl} | 10 +-- src/bel_scan_loc.erl | 11 ++- src/bel_scan_mark.erl | 2 +- src/bel_scan_token.erl | 5 +- test/bel_scan_SUITE.erl | 24 +++++-- 9 files changed, 149 insertions(+), 49 deletions(-) rename src/{bel_scan_eel_eng.erl => bel_scan_eng_eel.erl} (88%) diff --git a/src/bel_scan.erl b/src/bel_scan.erl index a6c8fa4..4911f27 100644 --- a/src/bel_scan.erl +++ b/src/bel_scan.erl @@ -22,6 +22,7 @@ -module(bel_scan). -compile(inline_list_funcs). +% API -export([ new/1 , bin/2 , state/1 @@ -35,7 +36,25 @@ , push_tokens/2 ]). --export_type([ t/0, input/0 ]). +% State getters and setters functions +-export([ get_engines/1 + , set_engines/2 + , get_bpart/1 + , set_bpart/2 + , get_loc/1 + , set_loc/2 + , get_prev_loc/1 + , set_prev_loc/2 + , get_tokens/1 + , set_tokens/2 + ]). + +-export_type([ t/0 + , engine/0 + , bpart/0 + , loc/0 + , token/0 + ]). -import(bel_scan_loc, [ new_ln/1, incr_col/2 ]). -import(bel_scan_bpart, [ incr_len/2, get_part/1 ]). @@ -47,10 +66,18 @@ -include_lib("eunit/include/eunit.hrl"). -endif. --record(state, { engines, bpart, loc, prev_loc, tokens }). +-record(state, { engines :: [engine()] + , bpart :: bpart() + , loc :: loc() + , prev_loc :: loc() + , tokens :: [token()] + }). --opaque t() :: #state{}. --type input() :: binary(). +-opaque t() :: #state{}. +-type engine() :: bel_scan_eng:t(). +-type bpart() :: bel_scan_bpart:t(). +-type loc() :: bel_scan_loc:t(). +-type token() :: bel_scan_token:t(). %%%===================================================================== %%% API @@ -68,7 +95,7 @@ new(Params) when is_map(Params) -> tokens = maps:get(tokens, Params, []) }. -bin(Bin, Opts) -> +bin(Bin, Opts) when is_binary(Bin) -> start(Bin, new(Opts)). state(#state{bpart = BPart} = State) -> @@ -106,6 +133,40 @@ push_token(Token, #state{tokens = Tokens} = State) -> push_tokens(Tokens, State) when is_list(Tokens) -> lists:foldl(fun push_token/2, State, Tokens). +%%%===================================================================== +%%% State getters and setters functions +%%%===================================================================== + +get_engines(#state{engines = Engines}) -> + Engines. + +set_engines(Engines, #state{} = State) -> + State#state{engines = Engines}. + +get_bpart(#state{bpart = BPart}) -> + BPart. + +set_bpart(BPart, #state{} = State) -> + State#state{bpart = BPart}. + +get_loc(#state{loc = Loc}) -> + Loc. + +set_loc(Loc, #state{} = State) -> + State#state{loc = Loc}. + +get_prev_loc(#state{prev_loc = PrevLoc}) -> + PrevLoc. + +set_prev_loc(PrevLoc, #state{} = State) -> + State#state{prev_loc = PrevLoc}. + +get_tokens(#state{tokens = Tokens}) -> + Tokens. + +set_tokens(Tokens, #state{} = State) -> + State#state{tokens = Tokens}. + %%%===================================================================== %%% Internal functions %%%===================================================================== diff --git a/src/bel_scan_anno.erl b/src/bel_scan_anno.erl index 66e6c89..e95a277 100644 --- a/src/bel_scan_anno.erl +++ b/src/bel_scan_anno.erl @@ -1,13 +1,13 @@ -module(bel_scan_anno). --export_type([ t/0, loc/0, text/0, meta/0 ]). +-export_type([ t/0, loc/0, meta/0 ]). --record(anno, { - loc :: loc(), - text :: binary(), - meta :: term() -}). --opaque t() :: #anno{}. --type loc() :: bel_loc:t(). --type text() :: binary(). +% -record(anno, { + % init_loc :: loc(), + % end_loc :: loc(), + % meta :: term() +% }). +% -opaque t() :: #anno{}. +-type t() :: {loc(), loc(), meta()}. +-type loc() :: bel_scan_loc:t(). -type meta() :: term(). diff --git a/src/bel_scan_bpart.erl b/src/bel_scan_bpart.erl index f3a517b..8c7b02e 100644 --- a/src/bel_scan_bpart.erl +++ b/src/bel_scan_bpart.erl @@ -43,7 +43,10 @@ -include_lib("eunit/include/eunit.hrl"). -endif. --record(bpart, { bin, pos, len }). +-record(bpart, { bin :: binary() + , pos :: non_neg_integer() + , len :: non_neg_integer() + }). -opaque t() :: #bpart{}. %%%===================================================================== @@ -72,23 +75,23 @@ incr_len(N, #bpart{len = Len} = BPart) -> get_part(#bpart{bin = Bin} = BPart) -> binary:part(Bin, BPart#bpart.pos, BPart#bpart.len). -get_bin(#bpart{bin = X}) -> - X. +get_bin(#bpart{bin = Bin}) -> + Bin. -set_bin(X, #bpart{} = BPart) -> - BPart#bpart{bin = X}. +set_bin(Bin, #bpart{} = BPart) -> + BPart#bpart{bin = Bin}. -get_pos(#bpart{pos = X}) -> - X. +get_pos(#bpart{pos = Pos}) -> + Pos. -set_pos(X, #bpart{} = BPart) -> - BPart#bpart{pos = X}. +set_pos(Pos, #bpart{} = BPart) -> + BPart#bpart{pos = Pos}. -get_len(#bpart{len = X}) -> - X. +get_len(#bpart{len = Len}) -> + Len. -set_len(X, #bpart{} = BPart) -> - BPart#bpart{len = X}. +set_len(Len, #bpart{} = BPart) -> + BPart#bpart{len = Len}. %%%===================================================================== %%% Tests diff --git a/src/bel_scan_eng.erl b/src/bel_scan_eng.erl index 830e98e..3f4a105 100644 --- a/src/bel_scan_eng.erl +++ b/src/bel_scan_eng.erl @@ -10,11 +10,22 @@ , set_state/2 ]). +-export_type([ t/0 + , scan/0 + , marker_id/0 + , token/0 + , loc/0 + , opts/0 + , state/0 + , re_group/0 + , captured/0 + ]). + % Callbacks --callback init(Opts) -> EngState +-callback init(Opts) -> Engine when Opts :: opts() - , EngState :: state() + , Engine :: t() . -callback handle_start(Bin, Scan) -> Return @@ -34,11 +45,13 @@ . -callback handle_match(Match, Scan) -> Return - when Match :: {MarkerMod, MarkerId, Text, Captured, EndLoc} + when Match :: {MarkerMod, MarkerId, Text, Captured, Loc} , MarkerMod :: module() , MarkerId :: marker_id() , Text :: binary() , Captured :: captured() + , Loc :: {InitLoc, EndLoc} + , InitLoc :: loc() , EndLoc :: loc() , Scan :: scan() , Return :: {noreply, scan()} @@ -46,18 +59,21 @@ | {halt, scan()} . --callback handle_terminate(Tokens, Scan0) -> Scan +-callback handle_terminate(Tokens, Scan0) -> Return when Tokens :: [token()] , Scan0 :: scan() - , Scan :: scan() + , Return :: {noreply, scan()} + | {reply, [token()], scan()} + | {halt, scan()} . % Libs -include("bel_scan_eng.hrl"). +-opaque t() :: #engine{}. -type scan() :: bel_scan:t(). --type marker_id() :: bel_scan_marker:id(). +-type marker_id() :: bel_scan_mark:id(). -type token() :: bel_scan_token:t(). -type loc() :: bel_scan_loc:t(). -type opts() :: term(). diff --git a/src/bel_scan_eel_eng.erl b/src/bel_scan_eng_eel.erl similarity index 88% rename from src/bel_scan_eel_eng.erl rename to src/bel_scan_eng_eel.erl index 39e56fe..431cc5b 100644 --- a/src/bel_scan_eel_eng.erl +++ b/src/bel_scan_eng_eel.erl @@ -1,4 +1,4 @@ --module(bel_scan_eel_eng). +-module(bel_scan_eng_eel). -behaviour(bel_scan_eng). % bel_scan_eng callbacks @@ -31,7 +31,7 @@ init(_Opts) -> "(<%=\\s+)(.*)?(\\s+%>)" "|" "(<%=\\s+)(.*)?(^(\\s*%>))" >> - } + }, #marker{ id = continue, re = << @@ -62,9 +62,9 @@ handle_start(_Bin, State) -> handle_text(_Text, State) -> {noreply, State}. -handle_match({?MODULE, MarkerId, _Text, Captured, EndLoc}, State0) -> - [_StartMarker, Expr, _EndMarker] = Captured, - {Token, State} = bel_scan:token(MarkerId, Expr, EndLoc, undefined, State0), +handle_match({?MODULE, MarkerId, _Text, Captured, Loc}, State) -> + [_SMarker, Expr, _EMarker] = Captured, + Token = bel_scan:token(MarkerId, Expr, Loc), {reply, [Token], State}; handle_match(_Match, State) -> {noreply, State}. diff --git a/src/bel_scan_loc.erl b/src/bel_scan_loc.erl index 7284596..35b5859 100644 --- a/src/bel_scan_loc.erl +++ b/src/bel_scan_loc.erl @@ -44,6 +44,7 @@ -export_type([ t/0 ]). +-define(FIRST_POS, 0). -define(FIRST_LN, 1). -define(FIRST_COL, 1). @@ -63,7 +64,7 @@ new(Params) when is_map(Params) -> FirstLn = maps:get(first_ln, Params, ?FIRST_LN), FirstCol = maps:get(first_col, Params, ?FIRST_COL), #loc{ - pos = maps:get(len, Params, 0), + pos = maps:get(pos, Params, ?FIRST_POS), ln = maps:get(ln, Params, FirstLn), col = maps:get(col, Params, FirstCol), first_ln = FirstLn, @@ -109,7 +110,7 @@ incr_col(N, #loc{col = Col, pos = Pos} = Loc) -> pos = Pos+N }. -new_ln(#loc{ln = Ln, first_col = FirstCol, pos = Pos} = Loc) -> +new_ln(#loc{ln = Ln, first_col = FirstCol} = Loc) -> Loc#loc{ ln = Ln+1, col = FirstCol @@ -158,19 +159,25 @@ set_first_col(FirstCol, #loc{} = Loc) -> new_test() -> [ { "Should have default values" , ?assertEqual(#loc{ + pos = ?FIRST_POS, ln = ?FIRST_LN, col = ?FIRST_COL, + first_ln = ?FIRST_LN, first_col = ?FIRST_COL }, new(#{})) } , { "Should have params values" , ?assertEqual(#loc{ + pos = 6, ln = 6, col = 6, + first_ln = 6, first_col = 6 }, new(#{ + pos => 6, ln => 6, col => 6, + first_ln => 6, first_col => 6 })) } diff --git a/src/bel_scan_mark.erl b/src/bel_scan_mark.erl index 4795efe..5862d96 100644 --- a/src/bel_scan_mark.erl +++ b/src/bel_scan_mark.erl @@ -20,7 +20,7 @@ -opaque t() :: #marker{}. -type id() :: term(). --type re() :: binary() | re:mp(). +-type re() :: binary() | {re_pattern, _, _, _, _}. % re:mp/0 isn't exported. compile(#marker{} = Marker) -> case compile_re(Marker#marker.re) of diff --git a/src/bel_scan_token.erl b/src/bel_scan_token.erl index 1102ba8..edc9aa2 100644 --- a/src/bel_scan_token.erl +++ b/src/bel_scan_token.erl @@ -18,9 +18,10 @@ value :: term() }). --opaque t() :: #token{}. +% -opaque t() :: #token{}. +-type t() :: {id(), anno(), value()}. -type id() :: term(). --type anno() :: bel_scan:t(). +-type anno() :: bel_scan_anno:t(). -type value() :: term(). new(Params) when is_map(Params) -> diff --git a/test/bel_scan_SUITE.erl b/test/bel_scan_SUITE.erl index c87904e..809914c 100644 --- a/test/bel_scan_SUITE.erl +++ b/test/bel_scan_SUITE.erl @@ -133,17 +133,29 @@ all() -> %%% Test cases %%%===================================================================== -% foo {{ {{A, b}, {0, "C"}} }} bar bin(Config) when is_list(Config) -> - Expect = error, - Bin = << + Opts = #{ + engines => [support_scan_eng] + }, + + SingleLnBin = <<"foo {{ {{A, b}, {0, \"C\"}} }} bar">>, + [ + {text,{{1,1},{1,5},undefined},<<"foo ">>}, + {expr,{{1,5},{1,30},undefined},<<"{{A, b}, {0, \"C\"}}">>}, + {text,{{1,30},{1,34},undefined},<<" bar">>} + ] = bel_scan:get_tokens(bel_scan:bin(SingleLnBin, Opts)), + + MultiLnBin = << "foo {{ {{A, b}, {0, \"C\"}} }} bar" >>, - Expect = bel_scan:bin(Bin, #{ - engines => [support_scan_eng] - }), + [ + {text,{{1,1},{1,5},undefined},<<"foo ">>}, + {expr,{{1,5},{2,14},undefined},<<"{{A, b},\n {0, \"C\"}}">>}, + {text,{{2,14},{3,5},undefined},<<"\n bar">>} + ] = bel_scan:get_tokens(bel_scan:bin(MultiLnBin, Opts)), + ok. %%%===================================================================== From f10e72c3b218b263ea1ff6af53500795ad6a0ba6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?William=20Fank=20Thom=C3=A9?= Date: Tue, 23 Apr 2024 23:38:12 -0300 Subject: [PATCH 11/25] feat: html5 scanner --- src/bel_scan.erl | 40 ++++++----- src/bel_scan_anno.erl | 5 +- src/bel_scan_bpart.erl | 38 ++++++---- src/bel_scan_eng_eel.erl | 20 +++--- src/bel_scan_eng_html5.erl | 120 ++++++++++++++++++++++++++++++++ src/bel_scan_eng_html5_attr.erl | 57 +++++++++++++++ src/bel_scan_loc.erl | 13 ++-- src/bel_scan_token.erl | 2 +- 8 files changed, 246 insertions(+), 49 deletions(-) create mode 100644 src/bel_scan_eng_html5.erl create mode 100644 src/bel_scan_eng_html5_attr.erl diff --git a/src/bel_scan.erl b/src/bel_scan.erl index 4911f27..e9b30fe 100644 --- a/src/bel_scan.erl +++ b/src/bel_scan.erl @@ -26,12 +26,11 @@ -export([ new/1 , bin/2 , state/1 + , state/2 , fold/2 , text_token/2 - , text_token/3 , token/3 , token/4 - , token/5 , push_token/2 , push_tokens/2 ]). @@ -101,31 +100,34 @@ bin(Bin, Opts) when is_binary(Bin) -> state(#state{bpart = BPart} = State) -> start(bel_scan_bpart:get_bin(BPart), State). +state(Bin, #state{bpart = BPart} = State) when is_binary(Bin) -> + start(Bin, State#state{ + bpart = bel_scan_bpart:set_bin(Bin, BPart) + }). + fold(#state{} = State, Funs) -> lists:foldl(fun(F, S) -> F(S) end, State, Funs). -text_token(Text, State) -> - text_token(Text, State#state.prev_loc, State#state.loc). - -text_token(Text, InitLoc, EndLoc) -> - token(text, Text, InitLoc, EndLoc). +text_token(Text, #state{} = State) -> + Loc = {State#state.prev_loc, State#state.loc}, + token(text, Text, ?DEFAULT_META, Loc); +text_token(Text, Loc) -> + token(text, Text, ?DEFAULT_META, Loc). -token(Id, Value, {InitLoc, EndLoc}) -> - token(Id, Value, InitLoc, EndLoc, ?DEFAULT_META). +token(Id, Value, Loc) -> + token(Id, Value, ?DEFAULT_META, Loc). -token(Id, Value, InitLoc, EndLoc) -> - token(Id, Value, InitLoc, EndLoc, ?DEFAULT_META). +token(Id, Value, Metadata, Loc) -> + {Id, anno(Loc, Metadata), Value}. -token(Id, Value, InitLoc, EndLoc, Metadata) -> - {Id, anno(InitLoc, EndLoc, Metadata), Value}. - -anno(InitLoc0, EndLoc0, Metadata) -> +anno({InitLoc0, EndLoc0}, Metadata) -> InitLoc = bel_scan_loc:to_tuple(InitLoc0), EndLoc = bel_scan_loc:to_tuple(EndLoc0), - {InitLoc, EndLoc, Metadata}. + {{InitLoc, EndLoc}, Metadata}. clear_text(#state{bpart = BPart} = State) -> - State#state{bpart = bel_scan_bpart:set_len(0, BPart)}. + Pos = bel_scan_loc:get_pos(State#state.loc), + State#state{bpart = bel_scan_bpart:reset_pos(Pos, BPart)}. push_token(Token, #state{tokens = Tokens} = State) -> State#state{tokens = Tokens ++ [Token]}. @@ -182,14 +184,14 @@ init_engine({Mod, Opts}) when is_atom(Mod) -> start(Bin0, State0) -> State = handle_start(Bin0, State0), Bin = bel_scan_bpart:get_bin(State#state.bpart), - continue(scan, Bin, State). + continue(find_start_markers, Bin, State). continue(scan, <<>>, State) -> terminate(State); continue(scan, <>, State) -> case bel_scan_read:bin(Rest0) of {{new_ln, Incr}, Rest} -> - continue(scan, Rest, fold(State, [ + continue(find_start_markers, Rest, fold(State, [ fun(S) -> S#state{loc = new_ln(S#state.loc)} end, fun(S) -> S#state{bpart = incr_len(Incr, S#state.bpart)} end ])); diff --git a/src/bel_scan_anno.erl b/src/bel_scan_anno.erl index e95a277..ea356fa 100644 --- a/src/bel_scan_anno.erl +++ b/src/bel_scan_anno.erl @@ -1,6 +1,6 @@ -module(bel_scan_anno). --export_type([ t/0, loc/0, meta/0 ]). +-export_type([ t/0, loc/0, text/0, meta/0 ]). % -record(anno, { % init_loc :: loc(), @@ -8,6 +8,7 @@ % meta :: term() % }). % -opaque t() :: #anno{}. --type t() :: {loc(), loc(), meta()}. +-type t() :: {loc(), loc(), text(), meta()}. -type loc() :: bel_scan_loc:t(). +-type text() :: binary(). -type meta() :: term(). diff --git a/src/bel_scan_bpart.erl b/src/bel_scan_bpart.erl index 8c7b02e..b06d4c2 100644 --- a/src/bel_scan_bpart.erl +++ b/src/bel_scan_bpart.erl @@ -23,15 +23,17 @@ -export([ new/1 , reset_pos/2 + , incr_pos/2 + , incr_len/2 + , get_part/1 , get_bin/1 , set_bin/2 , get_pos/1 , set_pos/2 , get_len/1 , set_len/2 - , incr_pos/2 - , incr_len/2 - , get_part/1 + , get_init_len/1 + , set_init_len/2 ]). -export_type([ t/0 ]). @@ -43,9 +45,10 @@ -include_lib("eunit/include/eunit.hrl"). -endif. --record(bpart, { bin :: binary() - , pos :: non_neg_integer() - , len :: non_neg_integer() +-record(bpart, { bin :: binary() + , pos :: non_neg_integer() + , len :: non_neg_integer() + , init_len :: non_neg_integer() }). -opaque t() :: #bpart{}. @@ -54,16 +57,18 @@ %%%===================================================================== new(Params) when is_map(Params) -> + InitLen = maps:get(init_len, Params, ?INIT_LEN), #bpart{ bin = maps:get(bin, Params), pos = maps:get(pos, Params, ?FIRST_POS), - len = maps:get(len, Params, ?INIT_LEN) + len = maps:get(len, Params, InitLen), + init_len = InitLen }. -reset_pos(Pos, #bpart{} = BPart) -> +reset_pos(Pos, #bpart{init_len = InitLen} = BPart) -> BPart#bpart{ pos = Pos, - len = ?INIT_LEN + len = InitLen }. incr_pos(N, #bpart{pos = Pos} = BPart) -> @@ -93,6 +98,12 @@ get_len(#bpart{len = Len}) -> set_len(Len, #bpart{} = BPart) -> BPart#bpart{len = Len}. +get_init_len(#bpart{init_len = InitLen}) -> + InitLen. + +set_init_len(InitLen, #bpart{} = BPart) -> + BPart#bpart{init_len = InitLen}. + %%%===================================================================== %%% Tests %%%===================================================================== @@ -105,18 +116,21 @@ new_test() -> , ?assertEqual(#bpart{ bin = <<>>, pos = ?FIRST_POS, - len = ?INIT_LEN + len = ?INIT_LEN, + init_len = ?INIT_LEN }, new(#{bin => <<>>})) } , { "Should have params values" , ?assertEqual(#bpart{ bin = <<>>, pos = 6, - len = 6 + len = 6, + init_len = 6 }, new(#{ bin => <<>>, pos => 6, - len => 6 + len => 6, + init_len => 6 })) } ]. diff --git a/src/bel_scan_eng_eel.erl b/src/bel_scan_eng_eel.erl index 431cc5b..59ba34b 100644 --- a/src/bel_scan_eng_eel.erl +++ b/src/bel_scan_eng_eel.erl @@ -21,36 +21,36 @@ init(_Opts) -> #marker{ id = inline, re = << - "(<%=\\s+)(.*)?(\\s+\\.%>)" "|" - "(<%=\\s+)(.*)?(^(\\s*\\.%>))" + "(<%=\\s+)(.*?[^\\s]*)(\\s+\\.%>)" "|" + "(<%=\\s+)(.*?[^\\s]*)(^(\\s*\\.%>))" >> }, #marker{ id = start, re = << - "(<%=\\s+)(.*)?(\\s+%>)" "|" - "(<%=\\s+)(.*)?(^(\\s*%>))" + "(<%=\\s+)(.*?[^\\s]*)(\\s+%>)" "|" + "(<%=\\s+)(.*?[^\\s]*)(^(\\s*%>))" >> }, #marker{ id = continue, re = << - "(<%\\s+)(.*)?(\\s+%>)" "|" - "(<%\\s+)(.*)?(^(\\s*%>))" + "(<%\\s+)(.*?[^\\s]*)(\\s+%>)" "|" + "(<%\\s+)(.*?[^\\s]*)(^(\\s*%>))" >> }, #marker{ id = terminate, re = << - "(<%\\s+)(.*)?(\\s+\\.%>)" "|" - "(<%\\s+)(.*)?(^(\\s*\\.%>))" + "(<%\\s+)(.*?[^\\s]*)(\\s+\\.%>)" "|" + "(<%\\s+)(.*?[^\\s]*)(^(\\s*\\.%>))" >> }, #marker{ id = comment, re = << - "(<%!--\\s+)(.*)?(\\s+--%>)" "|" - "(<%!--\\s+)(.*)?(^(\\s*--%>))" + "(<%!--\\s+)(.*?[^\\s]*)(\\s+--%>)" "|" + "(<%!--\\s+)(.*?[^\\s]*)(^(\\s*--%>))" >> } ] diff --git a/src/bel_scan_eng_html5.erl b/src/bel_scan_eng_html5.erl new file mode 100644 index 0000000..78a0082 --- /dev/null +++ b/src/bel_scan_eng_html5.erl @@ -0,0 +1,120 @@ +-module(bel_scan_eng_html5). +-behaviour(bel_scan_eng). + +% bel_scan_eng callbacks +-export([ init/1 + , handle_start/2 + , handle_text/2 + , handle_match/2 + , handle_terminate/2 + ]). + +-include("bel_scan_eng.hrl"). + +-define(DOCTYPE, "!(?:(?i)DOCTYPE)"). + +-define(SPECIAL_TAG, "(script|style|textarea|title)"). + +-define(VOID_TAG, + "(area|base|br|col|embed|hr|img|input|" + "link|meta|param|source|track|wbr)" +). + +-define(ELEM_TAG, "(\\w+)"). + +-define(OPEN_TAG, "<"). + +-define(CLOSE_TAG, ">"). + +-define(CLOSE_VOID, "\\/?>"). + +-define(CLOSING_TAG, "<\\/(?1)\\s*>"). + +-define(ATTRS, "\\s*(.*?[^\\s]?)\\s*"). + +-define(CONTENT, ?ATTRS). + +-define(CHILD_NODES, "\\s*((?:(?R)|(?:(?!<\\/?(?1)).))*)"). + +%%%===================================================================== +%%% bel_scan_eng callbacks +%%%===================================================================== + +init(_Opts) -> + #engine{ + markers = [ + #marker{ + id = doctype, + re = << + ?OPEN_TAG ?DOCTYPE ?ATTRS ?CLOSE_TAG + >> + }, + #marker{ + id = special_tag, + re = << + ?OPEN_TAG ?SPECIAL_TAG ?ATTRS ?CLOSE_TAG + ?CONTENT + ?CLOSING_TAG + >> + }, + #marker{ + id = void_tag, + re = << + ?OPEN_TAG ?VOID_TAG ?ATTRS ?CLOSE_VOID + >> + }, + #marker{ + id = elem_tag, + re = <<"(?s)" + ?OPEN_TAG ?ELEM_TAG ?ATTRS ?CLOSE_TAG + ?CHILD_NODES + ?CLOSING_TAG + >> + } + ] + }. + +handle_start(_Bin, State) -> + {noreply, State}. + +handle_text(_Text, State) -> + {noreply, State}. + +handle_match({?MODULE, doctype, _Text, Captured, Loc}, State) -> + [<<"html">>] = Captured, + Token = bel_scan:token(doctype, <<"html">>, Loc), + {reply, [Token], State}; +handle_match({?MODULE, special_tag, _Text, Captured, Loc}, State) -> + [Tag, Attrs, Content] = Captured, + Metadata = {attributes(Attrs), Content}, + Token = bel_scan:token(special_tag, Tag, Metadata, Loc), + {reply, [Token], State}; +handle_match({?MODULE, void_tag, _Text, Captured, Loc}, State) -> + [Tag, Attrs] = Captured, + Metadata = attributes(Attrs), + Token = bel_scan:token(void_tag, Tag, Metadata, Loc), + {reply, [Token], State}; +handle_match({?MODULE, elem_tag, _Text, Captured, Loc}, State) -> + [Tag, Attrs, ChildNodes] = Captured, + Metadata = {attributes(Attrs), child_nodes(ChildNodes)}, + Token = bel_scan:token(elem_tag, Tag, Metadata, Loc), + {reply, [Token], State}; +handle_match(_Match, State) -> + {noreply, State}. + +handle_terminate(_Tokens, State) -> + {noreply, State}. + +%%%===================================================================== +%%% Internal functions +%%%===================================================================== + +attributes(Bin) -> + bel_scan:get_tokens(bel_scan:bin(Bin, #{ + engines => [bel_scan_eng_html5_attr] + })). + +child_nodes(Bin) -> + bel_scan:get_tokens(bel_scan:bin(Bin, #{ + engines => [?MODULE] + })). diff --git a/src/bel_scan_eng_html5_attr.erl b/src/bel_scan_eng_html5_attr.erl new file mode 100644 index 0000000..a0db1e2 --- /dev/null +++ b/src/bel_scan_eng_html5_attr.erl @@ -0,0 +1,57 @@ +-module(bel_scan_eng_html5_attr). +-behaviour(bel_scan_eng). + +% bel_scan_eng callbacks +-export([ init/1 + , handle_start/2 + , handle_text/2 + , handle_match/2 + , handle_terminate/2 + ]). + +-include("bel_scan_eng.hrl"). + +%%%===================================================================== +%%% bel_scan_eng callbacks +%%%===================================================================== + +init(_Opts) -> + #engine{ + markers = [ + #marker{ + id = attribute, + re = << + "(\\w+)=\\\"(.*?[^\\\\\"])\\\"" "|" + "(\\w+)='(.*?[^\\\\'])'" "|" + "(\\w+)" + >> + } + ] + }. + +handle_start(_Bin, State) -> + {noreply, State}. + +handle_text(_Text, State) -> + {halt, State}. + +handle_match({?MODULE, attribute, Text, [<<>>, <<>>, <<>>, <<>>, K], Loc}, State) -> + Token = bel_scan:token(attribute, Text, K, Loc), + {reply, [Token], State}; +handle_match({?MODULE, attribute, Text, [<<>>, <<>>, K, V], Loc}, State) -> + Token = bel_scan:token(attribute, Text, {K, V}, Loc), + {reply, [Token], State}; +handle_match({?MODULE, attribute, Text, [K, V], Loc}, State) -> + Token = bel_scan:token(attribute, Text, {K, V}, Loc), + {reply, [Token], State}; +handle_match(_Match, State) -> + {noreply, State}. + +handle_terminate(_Tokens, State) -> + {noreply, State}. + +%%%===================================================================== +%%% Internal functions +%%%===================================================================== + +% nothing here yet! diff --git a/src/bel_scan_loc.erl b/src/bel_scan_loc.erl index 35b5859..2df04c7 100644 --- a/src/bel_scan_loc.erl +++ b/src/bel_scan_loc.erl @@ -88,12 +88,14 @@ do_read(Bin, Loc) -> end. incr(#loc{ln = Ln, col = Col}, Loc) -> - set_pos(Loc#loc.pos, incr({Ln, Col}, Loc)); + incr({Ln, Col}, Loc); incr({Ln, Col}, #loc{first_ln = Ln} = Loc) -> incr_pos(Col - Loc#loc.first_col, - set_col(Col + Loc#loc.col, Loc)); + set_col(Col - Loc#loc.first_col + Loc#loc.col, Loc)); incr({Ln, Col}, #loc{} = Loc) -> - set_col(Col, set_ln(Ln, Loc)). + set_pos(Loc#loc.pos, + set_col(Col, + set_ln(Ln, Loc))). incr_pos(N, #loc{pos = Pos} = Loc) -> Loc#loc{pos = Pos+N}. @@ -110,10 +112,11 @@ incr_col(N, #loc{col = Col, pos = Pos} = Loc) -> pos = Pos+N }. -new_ln(#loc{ln = Ln, first_col = FirstCol} = Loc) -> +new_ln(#loc{ln = Ln, first_col = FirstCol, pos = Pos} = Loc) -> Loc#loc{ ln = Ln+1, - col = FirstCol + col = FirstCol, + pos = Pos+1 }. to_tuple(#loc{ln = Ln, col = Col}) -> diff --git a/src/bel_scan_token.erl b/src/bel_scan_token.erl index edc9aa2..338adb6 100644 --- a/src/bel_scan_token.erl +++ b/src/bel_scan_token.erl @@ -20,7 +20,7 @@ % -opaque t() :: #token{}. -type t() :: {id(), anno(), value()}. --type id() :: term(). +-type id() :: atom(). -type anno() :: bel_scan_anno:t(). -type value() :: term(). From 721e6d52887e0342f026d74464d621e431dfb538 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?William=20Fank=20Thom=C3=A9?= Date: Wed, 24 Apr 2024 17:58:00 -0300 Subject: [PATCH 12/25] fix: text and position issues --- src/bel_scan.erl | 15 +-- src/bel_scan_eng_eel.erl | 2 +- src/bel_scan_eng_html5.erl | 169 +++++++++++++++++++++++++++----- src/bel_scan_eng_html5_attr.erl | 2 +- src/bel_scan_loc.erl | 12 ++- 5 files changed, 165 insertions(+), 35 deletions(-) diff --git a/src/bel_scan.erl b/src/bel_scan.erl index e9b30fe..7fcfc71 100644 --- a/src/bel_scan.erl +++ b/src/bel_scan.erl @@ -58,18 +58,19 @@ -import(bel_scan_loc, [ new_ln/1, incr_col/2 ]). -import(bel_scan_bpart, [ incr_len/2, get_part/1 ]). --define(DEFAULT_OPTS, #{}). --define(DEFAULT_META, undefined). - -ifdef(TEST). -include_lib("eunit/include/eunit.hrl"). -endif. +-define(DEFAULT_OPTS, #{}). +-define(DEFAULT_META, undefined). + -record(state, { engines :: [engine()] , bpart :: bpart() , loc :: loc() , prev_loc :: loc() , tokens :: [token()] + , init_pos :: pos() }). -opaque t() :: #state{}. @@ -77,6 +78,7 @@ -type bpart() :: bel_scan_bpart:t(). -type loc() :: bel_scan_loc:t(). -type token() :: bel_scan_token:t(). +-type pos() :: bel_scan_loc:pos(). %%%===================================================================== %%% API @@ -91,7 +93,8 @@ new(Params) when is_map(Params) -> })), loc = Loc, prev_loc = maps:get(prev_loc, Params, Loc), - tokens = maps:get(tokens, Params, []) + tokens = maps:get(tokens, Params, []), + init_pos = maps:get(init_pos, Params, bel_scan_loc:get_pos(Loc)) }. bin(Bin, Opts) when is_binary(Bin) -> @@ -127,7 +130,7 @@ anno({InitLoc0, EndLoc0}, Metadata) -> clear_text(#state{bpart = BPart} = State) -> Pos = bel_scan_loc:get_pos(State#state.loc), - State#state{bpart = bel_scan_bpart:reset_pos(Pos, BPart)}. + State#state{bpart = bel_scan_bpart:reset_pos(Pos - State#state.init_pos, BPart)}. push_token(Token, #state{tokens = Tokens} = State) -> State#state{tokens = Tokens ++ [Token]}. @@ -211,7 +214,7 @@ continue(find_start_markers, <>, State0) -> MatchTextLoc = bel_scan_loc:read(MatchText), EndLoc = bel_scan_loc:incr(MatchTextLoc, InitLoc), Pos = bel_scan_loc:get_pos(MatchTextLoc) + bel_scan_loc:get_pos(InitLoc), - BPart = bel_scan_bpart:reset_pos(Pos, State1#state.bpart), + BPart = bel_scan_bpart:reset_pos(Pos - State1#state.init_pos, State1#state.bpart), Match = {Mod, MarkerId, MatchText, Captured, {InitLoc, EndLoc}}, continue({handle_match, Match}, Rest, State1#state{ loc = EndLoc, diff --git a/src/bel_scan_eng_eel.erl b/src/bel_scan_eng_eel.erl index 59ba34b..48a858e 100644 --- a/src/bel_scan_eng_eel.erl +++ b/src/bel_scan_eng_eel.erl @@ -66,7 +66,7 @@ handle_match({?MODULE, MarkerId, _Text, Captured, Loc}, State) -> [_SMarker, Expr, _EMarker] = Captured, Token = bel_scan:token(MarkerId, Expr, Loc), {reply, [Token], State}; -handle_match(_Match, State) -> +handle_match({Mod, _, _, _, _}, State) when Mod =/= ?MODULE -> {noreply, State}. handle_terminate(_Tokens, State) -> diff --git a/src/bel_scan_eng_html5.erl b/src/bel_scan_eng_html5.erl index 78a0082..17e9028 100644 --- a/src/bel_scan_eng_html5.erl +++ b/src/bel_scan_eng_html5.erl @@ -11,7 +11,9 @@ -include("bel_scan_eng.hrl"). --define(DOCTYPE, "!(?:(?i)DOCTYPE)"). +-ifdef(TEST). +-include_lib("eunit/include/eunit.hrl"). +-endif. -define(SPECIAL_TAG, "(script|style|textarea|title)"). @@ -22,19 +24,19 @@ -define(ELEM_TAG, "(\\w+)"). --define(OPEN_TAG, "<"). +-define(OPEN_TAG, "(<)"). --define(CLOSE_TAG, ">"). +-define(CLOSE_TAG, "(>)"). --define(CLOSE_VOID, "\\/?>"). +-define(CLOSE_VOID, "(\\/?>)"). --define(CLOSING_TAG, "<\\/(?1)\\s*>"). +-define(CLOSING_TAG, "(<\\/(?2)>)"). --define(ATTRS, "\\s*(.*?[^\\s]?)\\s*"). +-define(ATTRS, "(.*?)"). --define(CONTENT, ?ATTRS). +-define(CONTENT, "(.*?)"). --define(CHILD_NODES, "\\s*((?:(?R)|(?:(?!<\\/?(?1)).))*)"). +-define(CHILD_NODES, "((?:(?R)|(?:(?!<\\/?(?2)).*?))*)"). %%%===================================================================== %%% bel_scan_eng callbacks @@ -45,13 +47,11 @@ init(_Opts) -> markers = [ #marker{ id = doctype, - re = << - ?OPEN_TAG ?DOCTYPE ?ATTRS ?CLOSE_TAG - >> + re = <<"">> }, #marker{ id = special_tag, - re = << + re = <<"(?s)" ?OPEN_TAG ?SPECIAL_TAG ?ATTRS ?CLOSE_TAG ?CONTENT ?CLOSING_TAG @@ -80,26 +80,28 @@ handle_start(_Bin, State) -> handle_text(_Text, State) -> {noreply, State}. -handle_match({?MODULE, doctype, _Text, Captured, Loc}, State) -> - [<<"html">>] = Captured, +handle_match({?MODULE, doctype, _Text, [], Loc}, State) -> Token = bel_scan:token(doctype, <<"html">>, Loc), {reply, [Token], State}; handle_match({?MODULE, special_tag, _Text, Captured, Loc}, State) -> - [Tag, Attrs, Content] = Captured, - Metadata = {attributes(Attrs), Content}, + [OAngB, Tag, Attrs, _CAngB, Content, _CTag] = Captured, + Metadata = {attributes(Attrs, [OAngB, Tag], Loc), Content}, Token = bel_scan:token(special_tag, Tag, Metadata, Loc), {reply, [Token], State}; handle_match({?MODULE, void_tag, _Text, Captured, Loc}, State) -> - [Tag, Attrs] = Captured, - Metadata = attributes(Attrs), + [OAngB, Tag, Attrs, _CAngB] = Captured, + Metadata = attributes(Attrs, [OAngB, Tag], Loc), Token = bel_scan:token(void_tag, Tag, Metadata, Loc), {reply, [Token], State}; handle_match({?MODULE, elem_tag, _Text, Captured, Loc}, State) -> - [Tag, Attrs, ChildNodes] = Captured, - Metadata = {attributes(Attrs), child_nodes(ChildNodes)}, + [OAngB, Tag, Attrs, CAngB, ChildNodes, _CTag] = Captured, + Metadata = { + attributes(Attrs, [OAngB, Tag], Loc), + child_nodes(ChildNodes, [OAngB, Tag, Attrs, CAngB], Loc, State) + }, Token = bel_scan:token(elem_tag, Tag, Metadata, Loc), {reply, [Token], State}; -handle_match(_Match, State) -> +handle_match({Mod, _, _, _, _}, State) when Mod =/= ?MODULE -> {noreply, State}. handle_terminate(_Tokens, State) -> @@ -109,12 +111,129 @@ handle_terminate(_Tokens, State) -> %%% Internal functions %%%===================================================================== -attributes(Bin) -> +attributes(Bin, PrevParts, Loc) -> bel_scan:get_tokens(bel_scan:bin(Bin, #{ - engines => [bel_scan_eng_html5_attr] + engines => [bel_scan_eng_html5_attr], + loc => init_loc(PrevParts, Loc) })). -child_nodes(Bin) -> +child_nodes(Bin, PrevParts, Loc, State) -> bel_scan:get_tokens(bel_scan:bin(Bin, #{ - engines => [?MODULE] + engines => bel_scan:get_engines(State), + loc => init_loc(PrevParts, Loc) })). + +init_loc(PrevParts, {InitLoc, _EndLoc}) -> + BLoc = bel_scan_loc:read(iolist_to_binary(PrevParts)), + bel_scan_loc:incr(BLoc, InitLoc). + +%%%===================================================================== +%%% Tests +%%%===================================================================== + +-ifdef(TEST). +-compile([export_all, nowarn_export_all]). + +% NOTE: Just a test, not intended to have a valid HTML syntax. + +-define(SLINE, << +" Lalala Some test bar" +>>). + +-define(MLINE, <<" + + ooooo +bar + + + + Title + + +">>). + +scan_(Bin) -> + bel_scan:get_tokens(bel_scan:bin(Bin, #{engines => [?MODULE]})). + +scan_test() -> + [ { "Should scan single line" + , ?assertEqual([ + {text,{{{1,1},{1,6}},undefined},<<" ">>}, + {doctype,{{{1,6},{1,21}},undefined},<<"html">>}, + {text,{{{1,21},{1,30}},undefined},<<" Lalala ">>}, + {void_tag, + {{{1,30},{1,47}}, + [{attribute,{{{1,37},{1,43}},<<"hidden">>},<<"hidden">>}]}, + <<"area">>}, + {text,{{{1,47},{1,48}},undefined},<<" ">>}, + {void_tag,{{{1,48},{1,57}},[]},<<"input">>}, + {text,{{{1,57},{1,58}},undefined},<<" ">>}, + {elem_tag, + {{{1,58},{1,129}}, + {[{attribute, + {{{1,63},{1,71}},{<<"id">>,<<"foo">>}}, + <<"id=\"foo\"">>}, + {attribute, + {{{1,72},{1,87}},{<<"title">>,<<"b\\'a\\'r">>}}, + <<"title='b\\'a\\'r'">>}, + {attribute,{{{1,91},{1,97}},<<"hidden">>},<<"hidden">>}], + [{text,{{{1,99},{1,100}},undefined},<<" ">>}, + {elem_tag, + {{{1,100},{1,122}}, + {[], + [{elem_tag, + {{{1,105},{1,115}}, + {[],[{text,{{{1,108},{1,111}},undefined},<<"vvv">>}]}}, + <<"b">>}, + {text,{{{1,115},{1,116}},undefined},<<" ">>}]}}, + <<"div">>}, + {text,{{{1,122},{1,123}},undefined},<<" ">>}]}}, + <<"div">>}, + {text,{{{1,129},{1,140}},undefined},<<" Some test ">>}, + {special_tag, + {{{1,140},{1,198}}, + {[{attribute,{{{1,147},{1,153}},<<"hidden">>},<<"hidden">>}, + {attribute, + {{{1,158},{1,166}},{<<"id">>,<<"foo">>}}, + <<"id='foo'">>}, + {attribute, + {{{1,168},{1,176}},<<"required">>}, + <<"required">>}], + <<"AAA">>}}, + <<"title">>}, + {text,{{{1,198},{1,202}},undefined},<<" bar">>} + ], scan_(?SLINE))} + , { "Should scan multiple lines" + , ?assertEqual([ + {text,{{{1,1},{2,1}},undefined},<<"\n">>}, + {special_tag, + {{{2,1},{5,10}},{[],<<"\n\n
Foo
\n\n">>}}, + <<"script">>}, + {text,{{{5,10},{6,1}},undefined},<<"\n">>}, + {elem_tag, + {{{6,1},{6,34}}, + {[], + [{text,{{{6,7},{6,10}},undefined},<<" ">>}, + {elem_tag, + {{{6,10},{6,26}}, + {[], + [{text, + {{{6,13},{6,22}},undefined}, + <<" ooooo ">>}]}}, + <<"i">>}, + {text,{{{6,26},{6,27}},undefined},<<" ">>}]}}, + <<"span">>}, + {text,{{{6,34},{9,1}},undefined},<<"ript>\n">>}, + {special_tag, + {{{9,1},{13,21}}, + {[],<<"\n\n Title\n\n ">>}}, + <<"title">>}, + {text,{{{13,21},{14,1}},undefined},<<"t">>} + ], scan_(?MLINE))} + ]. + +-endif. diff --git a/src/bel_scan_eng_html5_attr.erl b/src/bel_scan_eng_html5_attr.erl index a0db1e2..6e61766 100644 --- a/src/bel_scan_eng_html5_attr.erl +++ b/src/bel_scan_eng_html5_attr.erl @@ -44,7 +44,7 @@ handle_match({?MODULE, attribute, Text, [<<>>, <<>>, K, V], Loc}, State) -> handle_match({?MODULE, attribute, Text, [K, V], Loc}, State) -> Token = bel_scan:token(attribute, Text, {K, V}, Loc), {reply, [Token], State}; -handle_match(_Match, State) -> +handle_match({Mod, _, _, _, _}, State) when Mod =/= ?MODULE -> {noreply, State}. handle_terminate(_Tokens, State) -> diff --git a/src/bel_scan_loc.erl b/src/bel_scan_loc.erl index 2df04c7..a3ac7ce 100644 --- a/src/bel_scan_loc.erl +++ b/src/bel_scan_loc.erl @@ -42,7 +42,7 @@ , set_first_col/2 ]). --export_type([ t/0 ]). +-export_type([ t/0, pos/0 ]). -define(FIRST_POS, 0). -define(FIRST_LN, 1). @@ -55,6 +55,7 @@ -record(loc, { pos, ln, col, first_ln, first_col }). -opaque t() :: #loc{}. +-type pos() :: non_neg_integer(). %%%===================================================================== %%% API @@ -80,7 +81,10 @@ read(Bin, #loc{} = Loc) when is_binary(Bin) -> do_read(Bin, Loc) -> case bel_scan_read:bin(Bin) of {{new_ln, Incr}, Rest} -> - do_read(Rest, new_ln(incr_col(Incr, Loc))); + do_read(Rest, + set_ln(Loc#loc.ln+1, + set_col(Loc#loc.first_col, + incr_pos(Incr, Loc)))); {{continue, Incr}, Rest} -> do_read(Rest, incr_col(Incr, Loc)); terminate -> @@ -92,6 +96,10 @@ incr(#loc{ln = Ln, col = Col}, Loc) -> incr({Ln, Col}, #loc{first_ln = Ln} = Loc) -> incr_pos(Col - Loc#loc.first_col, set_col(Col - Loc#loc.first_col + Loc#loc.col, Loc)); +incr({Ln, Col}, #loc{ln = LocLn} = Loc) when Ln < LocLn -> + set_pos(Loc#loc.pos, + set_col(Col, + set_ln(Ln + Loc#loc.ln - Loc#loc.first_ln, Loc))); incr({Ln, Col}, #loc{} = Loc) -> set_pos(Loc#loc.pos, set_col(Col, From 480f57155965d2fdca839a44ed278223485645bd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?William=20Fank=20Thom=C3=A9?= Date: Thu, 25 Apr 2024 00:21:23 -0300 Subject: [PATCH 13/25] feat(html): add possibility to override attributes engine --- src/bel_scan.erl | 19 ++++++++--- src/bel_scan_eng_html5.erl | 66 +++++++++++++++++++++++--------------- 2 files changed, 54 insertions(+), 31 deletions(-) diff --git a/src/bel_scan.erl b/src/bel_scan.erl index 7fcfc71..3bfe376 100644 --- a/src/bel_scan.erl +++ b/src/bel_scan.erl @@ -33,6 +33,8 @@ , token/4 , push_token/2 , push_tokens/2 + , init_engines/1 + , lookup_engine/2 ]). % State getters and setters functions @@ -58,6 +60,8 @@ -import(bel_scan_loc, [ new_ln/1, incr_col/2 ]). -import(bel_scan_bpart, [ incr_len/2, get_part/1 ]). +-include("bel_scan_eng.hrl"). + -ifdef(TEST). -include_lib("eunit/include/eunit.hrl"). -endif. @@ -138,6 +142,12 @@ push_token(Token, #state{tokens = Tokens} = State) -> push_tokens(Tokens, State) when is_list(Tokens) -> lists:foldl(fun push_token/2, State, Tokens). +init_engines(Modules) -> + [init_engine(Mod) || Mod <- Modules]. + +lookup_engine(Mod, #state{engines = Engines}) -> + proplists:lookup(Mod, Engines). + %%%===================================================================== %%% State getters and setters functions %%%===================================================================== @@ -176,13 +186,12 @@ set_tokens(Tokens, #state{} = State) -> %%% Internal functions %%%===================================================================== -init_engines(Modules) -> - [init_engine(Mod) || Mod <- Modules]. - init_engine(Mod) when is_atom(Mod) -> init_engine({Mod, ?DEFAULT_OPTS}); -init_engine({Mod, Opts}) when is_atom(Mod) -> - {Mod, bel_scan_eng:compile(Mod:init(Opts))}. +init_engine({Mod, Opts}) when is_atom(Mod), is_map(Opts) -> + {Mod, bel_scan_eng:compile(Mod:init(Opts))}; +init_engine({Mod, #engine{} = Eng}) when is_atom(Mod) -> + {Mod, Eng}. start(Bin0, State0) -> State = handle_start(Bin0, State0), diff --git a/src/bel_scan_eng_html5.erl b/src/bel_scan_eng_html5.erl index 17e9028..1fe140b 100644 --- a/src/bel_scan_eng_html5.erl +++ b/src/bel_scan_eng_html5.erl @@ -38,11 +38,15 @@ -define(CHILD_NODES, "((?:(?R)|(?:(?!<\\/?(?2)).*?))*)"). +-define(ATTRS_ENGINES, [bel_scan_eng_html5_attr]). + +-record(state, { attrs_engines :: [module()] }). + %%%===================================================================== %%% bel_scan_eng callbacks %%%===================================================================== -init(_Opts) -> +init(Opts) -> #engine{ markers = [ #marker{ @@ -71,55 +75,65 @@ init(_Opts) -> ?CLOSING_TAG >> } - ] + ], + state = #state{ + attrs_engines = bel_scan:init_engines( + maps:get(attrs_engines, Opts, ?ATTRS_ENGINES) + ) + } }. -handle_start(_Bin, State) -> - {noreply, State}. +handle_start(_Bin, Scan) -> + {noreply, Scan}. -handle_text(_Text, State) -> - {noreply, State}. +handle_text(_Text, Scan) -> + {noreply, Scan}. -handle_match({?MODULE, doctype, _Text, [], Loc}, State) -> +handle_match({?MODULE, doctype, _Text, [], Loc}, Scan) -> Token = bel_scan:token(doctype, <<"html">>, Loc), - {reply, [Token], State}; -handle_match({?MODULE, special_tag, _Text, Captured, Loc}, State) -> + {reply, [Token], Scan}; +handle_match({?MODULE, special_tag, _Text, Captured, Loc}, Scan) -> [OAngB, Tag, Attrs, _CAngB, Content, _CTag] = Captured, - Metadata = {attributes(Attrs, [OAngB, Tag], Loc), Content}, + Metadata = {attributes(Attrs, [OAngB, Tag], Loc, Scan), Content}, Token = bel_scan:token(special_tag, Tag, Metadata, Loc), - {reply, [Token], State}; -handle_match({?MODULE, void_tag, _Text, Captured, Loc}, State) -> + {reply, [Token], Scan}; +handle_match({?MODULE, void_tag, _Text, Captured, Loc}, Scan) -> [OAngB, Tag, Attrs, _CAngB] = Captured, - Metadata = attributes(Attrs, [OAngB, Tag], Loc), + Metadata = attributes(Attrs, [OAngB, Tag], Loc, Scan), Token = bel_scan:token(void_tag, Tag, Metadata, Loc), - {reply, [Token], State}; -handle_match({?MODULE, elem_tag, _Text, Captured, Loc}, State) -> + {reply, [Token], Scan}; +handle_match({?MODULE, elem_tag, _Text, Captured, Loc}, Scan) -> [OAngB, Tag, Attrs, CAngB, ChildNodes, _CTag] = Captured, Metadata = { - attributes(Attrs, [OAngB, Tag], Loc), - child_nodes(ChildNodes, [OAngB, Tag, Attrs, CAngB], Loc, State) + attributes(Attrs, [OAngB, Tag], Loc, Scan), + child_nodes(ChildNodes, [OAngB, Tag, Attrs, CAngB], Loc, Scan) }, Token = bel_scan:token(elem_tag, Tag, Metadata, Loc), - {reply, [Token], State}; -handle_match({Mod, _, _, _, _}, State) when Mod =/= ?MODULE -> - {noreply, State}. + {reply, [Token], Scan}; +handle_match({Mod, _, _, _, _}, Scan) when Mod =/= ?MODULE -> + {noreply, Scan}. -handle_terminate(_Tokens, State) -> - {noreply, State}. +handle_terminate(_Tokens, Scan) -> + {noreply, Scan}. %%%===================================================================== %%% Internal functions %%%===================================================================== -attributes(Bin, PrevParts, Loc) -> +state(Scan) -> + {?MODULE, Engine} = bel_scan:lookup_engine(?MODULE, Scan), + Engine#engine.state. + +attributes(Bin, PrevParts, Loc, Scan) -> + State = state(Scan), bel_scan:get_tokens(bel_scan:bin(Bin, #{ - engines => [bel_scan_eng_html5_attr], + engines => State#state.attrs_engines, loc => init_loc(PrevParts, Loc) })). -child_nodes(Bin, PrevParts, Loc, State) -> +child_nodes(Bin, PrevParts, Loc, Scan) -> bel_scan:get_tokens(bel_scan:bin(Bin, #{ - engines => bel_scan:get_engines(State), + engines => bel_scan:get_engines(Scan), loc => init_loc(PrevParts, Loc) })). From 6f9f5a9af73e2c22c93e77f077432c6bf2bc4f61 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?William=20Fank=20Thom=C3=A9?= Date: Thu, 25 Apr 2024 12:00:25 -0300 Subject: [PATCH 14/25] fix: failling check --- src/bel_scan.erl | 6 ++++-- src/bel_scan_anno.erl | 9 ++++----- src/bel_scan_bpart.erl | 2 +- src/bel_scan_eng.erl | 2 +- src/bel_scan_loc.erl | 2 +- src/bel_scan_mark.erl | 6 +++--- src/bel_scan_token.erl | 2 +- test/bel_scan_SUITE.erl | 12 ++++++------ 8 files changed, 21 insertions(+), 20 deletions(-) diff --git a/src/bel_scan.erl b/src/bel_scan.erl index 3bfe376..0f6e14b 100644 --- a/src/bel_scan.erl +++ b/src/bel_scan.erl @@ -69,7 +69,7 @@ -define(DEFAULT_OPTS, #{}). -define(DEFAULT_META, undefined). --record(state, { engines :: [engine()] +-record(state, { engines :: [{module(), engine()}] , bpart :: bpart() , loc :: loc() , prev_loc :: loc() @@ -200,6 +200,8 @@ start(Bin0, State0) -> continue(scan, <<>>, State) -> terminate(State); +continue(find_start_markers, <<>>, State) -> + terminate(State); continue(scan, <>, State) -> case bel_scan_read:bin(Rest0) of {{new_ln, Incr}, Rest} -> @@ -235,7 +237,7 @@ continue(find_start_markers, <>, State0) -> end; continue({handle_match, Match}, Rest, State0) -> State = handle_match(Match, State0), - continue(scan, Rest, State). + continue(find_start_markers, Rest, State). terminate(State0) -> State = handle_text(State0), diff --git a/src/bel_scan_anno.erl b/src/bel_scan_anno.erl index ea356fa..6f58ab5 100644 --- a/src/bel_scan_anno.erl +++ b/src/bel_scan_anno.erl @@ -1,14 +1,13 @@ -module(bel_scan_anno). --export_type([ t/0, loc/0, text/0, meta/0 ]). +-export_type([ t/0, loc/0, meta/0 ]). % -record(anno, { % init_loc :: loc(), % end_loc :: loc(), % meta :: term() % }). -% -opaque t() :: #anno{}. --type t() :: {loc(), loc(), text(), meta()}. --type loc() :: bel_scan_loc:t(). --type text() :: binary(). +% -type t() :: #anno{}. +-type t() :: {{loc(), loc()}, meta()}. +-type loc() :: {non_neg_integer(), non_neg_integer()}. -type meta() :: term(). diff --git a/src/bel_scan_bpart.erl b/src/bel_scan_bpart.erl index b06d4c2..0582e42 100644 --- a/src/bel_scan_bpart.erl +++ b/src/bel_scan_bpart.erl @@ -50,7 +50,7 @@ , len :: non_neg_integer() , init_len :: non_neg_integer() }). --opaque t() :: #bpart{}. +-type t() :: #bpart{}. %%%===================================================================== %%% API diff --git a/src/bel_scan_eng.erl b/src/bel_scan_eng.erl index 3f4a105..b0fe828 100644 --- a/src/bel_scan_eng.erl +++ b/src/bel_scan_eng.erl @@ -71,7 +71,7 @@ -include("bel_scan_eng.hrl"). --opaque t() :: #engine{}. +-type t() :: #engine{}. -type scan() :: bel_scan:t(). -type marker_id() :: bel_scan_mark:id(). -type token() :: bel_scan_token:t(). diff --git a/src/bel_scan_loc.erl b/src/bel_scan_loc.erl index a3ac7ce..435973a 100644 --- a/src/bel_scan_loc.erl +++ b/src/bel_scan_loc.erl @@ -54,7 +54,7 @@ -record(loc, { pos, ln, col, first_ln, first_col }). --opaque t() :: #loc{}. +-type t() :: #loc{}. -type pos() :: non_neg_integer(). %%%===================================================================== diff --git a/src/bel_scan_mark.erl b/src/bel_scan_mark.erl index 5862d96..84a1b60 100644 --- a/src/bel_scan_mark.erl +++ b/src/bel_scan_mark.erl @@ -18,9 +18,9 @@ -include("bel_scan_eng.hrl"). --opaque t() :: #marker{}. --type id() :: term(). --type re() :: binary() | {re_pattern, _, _, _, _}. % re:mp/0 isn't exported. +-type t() :: #marker{}. +-type id() :: atom(). +-type re() :: binary() | {re_pattern, _, _, _, _}. % re:mp/0 isn't exported. compile(#marker{} = Marker) -> case compile_re(Marker#marker.re) of diff --git a/src/bel_scan_token.erl b/src/bel_scan_token.erl index 338adb6..258aa53 100644 --- a/src/bel_scan_token.erl +++ b/src/bel_scan_token.erl @@ -18,7 +18,7 @@ value :: term() }). -% -opaque t() :: #token{}. +% -type t() :: #token{}. -type t() :: {id(), anno(), value()}. -type id() :: atom(). -type anno() :: bel_scan_anno:t(). diff --git a/test/bel_scan_SUITE.erl b/test/bel_scan_SUITE.erl index 809914c..f254ea4 100644 --- a/test/bel_scan_SUITE.erl +++ b/test/bel_scan_SUITE.erl @@ -140,9 +140,9 @@ bin(Config) when is_list(Config) -> SingleLnBin = <<"foo {{ {{A, b}, {0, \"C\"}} }} bar">>, [ - {text,{{1,1},{1,5},undefined},<<"foo ">>}, - {expr,{{1,5},{1,30},undefined},<<"{{A, b}, {0, \"C\"}}">>}, - {text,{{1,30},{1,34},undefined},<<" bar">>} + {text,{{{1,1},{1,5}},undefined},<<"foo ">>}, + {expr,{{{1,5},{1,29}},undefined},<<"{{A, b}, {0, \"C\"}}">>}, + {text,{{{1,29},{1,33}},undefined},<<" bar">>} ] = bel_scan:get_tokens(bel_scan:bin(SingleLnBin, Opts)), MultiLnBin = << @@ -151,9 +151,9 @@ bin(Config) when is_list(Config) -> bar" >>, [ - {text,{{1,1},{1,5},undefined},<<"foo ">>}, - {expr,{{1,5},{2,14},undefined},<<"{{A, b},\n {0, \"C\"}}">>}, - {text,{{2,14},{3,5},undefined},<<"\n bar">>} + {text,{{{1,1},{1,5}},undefined},<<"foo ">>}, + {expr,{{{1,5},{2,14}},undefined},<<"{{A, b},\n {0, \"C\"}}">>}, + {text,{{{2,14},{3,5}},undefined},<<"\n bar">>} ] = bel_scan:get_tokens(bel_scan:bin(MultiLnBin, Opts)), ok. From 947bdfb1247a8ca54b6f87cba44e145ac6b54542 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?William=20Fank=20Thom=C3=A9?= Date: Thu, 25 Apr 2024 13:19:59 -0300 Subject: [PATCH 15/25] refactor: cleanup --- src/bel_scan.erl | 27 +++++++++---- src/bel_scan_anno.erl | 13 ------ src/bel_scan_bpart.erl | 30 ++++++++++---- src/bel_scan_eng.erl | 71 +++++++++++++++++++++++++-------- src/bel_scan_eng_eel.erl | 25 +++++++++++- src/bel_scan_eng_html5.erl | 27 +++++++++++-- src/bel_scan_eng_html5_attr.erl | 25 +++++++++++- src/bel_scan_loc.erl | 25 +++++++++--- src/bel_scan_mark.erl | 47 ++++++++++++++++++++-- src/bel_scan_read.erl | 32 +++++++++++++++ src/bel_scan_token.erl | 50 ----------------------- 11 files changed, 261 insertions(+), 111 deletions(-) delete mode 100644 src/bel_scan_anno.erl delete mode 100644 src/bel_scan_token.erl diff --git a/src/bel_scan.erl b/src/bel_scan.erl index 0f6e14b..23f9029 100644 --- a/src/bel_scan.erl +++ b/src/bel_scan.erl @@ -22,7 +22,7 @@ -module(bel_scan). -compile(inline_list_funcs). -% API +% API functions -export([ new/1 , bin/2 , state/1 @@ -55,6 +55,12 @@ , bpart/0 , loc/0 , token/0 + , token_id/0 + , token_anno/0 + , token_metadata/0 + , token_loc/0 + , token_value/0 + , pos/0 ]). -import(bel_scan_loc, [ new_ln/1, incr_col/2 ]). @@ -77,15 +83,20 @@ , init_pos :: pos() }). --opaque t() :: #state{}. --type engine() :: bel_scan_eng:t(). --type bpart() :: bel_scan_bpart:t(). --type loc() :: bel_scan_loc:t(). --type token() :: bel_scan_token:t(). --type pos() :: bel_scan_loc:pos(). +-opaque t() :: #state{}. +-type engine() :: bel_scan_eng:t(). +-type bpart() :: bel_scan_bpart:t(). +-type loc() :: bel_scan_loc:t(). +-type token() :: {token_id(), token_anno(), token_value()}. +-type token_id() :: atom(). +-type token_anno() :: {{token_loc(), token_loc()}, token_metadata()}. +-type token_metadata() :: term(). +-type token_loc() :: bel_scan_loc:pos_tuple(). +-type token_value() :: binary(). +-type pos() :: bel_scan_loc:pos(). %%%===================================================================== -%%% API +%%% API functions %%%===================================================================== new(Params) when is_map(Params) -> diff --git a/src/bel_scan_anno.erl b/src/bel_scan_anno.erl deleted file mode 100644 index 6f58ab5..0000000 --- a/src/bel_scan_anno.erl +++ /dev/null @@ -1,13 +0,0 @@ --module(bel_scan_anno). - --export_type([ t/0, loc/0, meta/0 ]). - -% -record(anno, { - % init_loc :: loc(), - % end_loc :: loc(), - % meta :: term() -% }). -% -type t() :: #anno{}. --type t() :: {{loc(), loc()}, meta()}. --type loc() :: {non_neg_integer(), non_neg_integer()}. --type meta() :: term(). diff --git a/src/bel_scan_bpart.erl b/src/bel_scan_bpart.erl index 0582e42..6777a22 100644 --- a/src/bel_scan_bpart.erl +++ b/src/bel_scan_bpart.erl @@ -21,12 +21,16 @@ %%%--------------------------------------------------------------------- -module(bel_scan_bpart). +% API functions -export([ new/1 , reset_pos/2 , incr_pos/2 , incr_len/2 , get_part/1 - , get_bin/1 + ]). + +% State getters and setters functions +-export([ get_bin/1 , set_bin/2 , get_pos/1 , set_pos/2 @@ -36,7 +40,7 @@ , set_init_len/2 ]). --export_type([ t/0 ]). +-export_type([ t/0, pos/0, len/0 ]). -define(FIRST_POS, 0). -define(INIT_LEN, 0). @@ -46,14 +50,16 @@ -endif. -record(bpart, { bin :: binary() - , pos :: non_neg_integer() - , len :: non_neg_integer() - , init_len :: non_neg_integer() + , pos :: pos() + , len :: len() + , init_len :: len() }). --type t() :: #bpart{}. +-type t() :: #bpart{}. +-type pos() :: non_neg_integer(). +-type len() :: non_neg_integer(). %%%===================================================================== -%%% API +%%% API functions %%%===================================================================== new(Params) when is_map(Params) -> @@ -80,6 +86,10 @@ incr_len(N, #bpart{len = Len} = BPart) -> get_part(#bpart{bin = Bin} = BPart) -> binary:part(Bin, BPart#bpart.pos, BPart#bpart.len). +%%%===================================================================== +%%% State getters and setters functions +%%%===================================================================== + get_bin(#bpart{bin = Bin}) -> Bin. @@ -104,6 +114,12 @@ get_init_len(#bpart{init_len = InitLen}) -> set_init_len(InitLen, #bpart{} = BPart) -> BPart#bpart{init_len = InitLen}. +%%%===================================================================== +%%% Internal functions +%%%===================================================================== + +% nothing here yet! + %%%===================================================================== %%% Tests %%%===================================================================== diff --git a/src/bel_scan_eng.erl b/src/bel_scan_eng.erl index b0fe828..6ed4308 100644 --- a/src/bel_scan_eng.erl +++ b/src/bel_scan_eng.erl @@ -1,8 +1,31 @@ +%%%--------------------------------------------------------------------- +%%% @copyright 2024 William Fank Thomé +%%% @author William Fank Thomé +%%% @doc Engine behaviour module. +%%% +%%% Copyright 2024 William Fank Thomé +%%% +%%% Licensed under the Apache License, Version 2.0 (the "License"); +%%% you may not use this file except in compliance with the License. +%%% You may obtain a copy of the License at +%%% +%%% http://www.apache.org/licenses/LICENSE-2.0 +%%% +%%% Unless required by applicable law or agreed to in writing, software +%%% distributed under the License is distributed on an "AS IS" BASIS, +%%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%%% See the License for the specific language governing permissions and +%%% limitations under the License. +%%% +%%% @end +%%%--------------------------------------------------------------------- -module(bel_scan_eng). --export([ compile/1 - , fold/2 - , get_module/1 +% API functions +-export([ compile/1, fold/2 ]). + +% State getters and setters functions +-export([ get_module/1 , set_module/2 , get_markers/1 , set_markers/2 @@ -21,7 +44,21 @@ , captured/0 ]). -% Callbacks +-include("bel_scan_eng.hrl"). + +-type t() :: #engine{}. +-type scan() :: bel_scan:t(). +-type marker_id() :: bel_scan_mark:id(). +-type token() :: bel_scan:token(). +-type loc() :: bel_scan_loc:t(). +-type opts() :: term(). +-type state() :: term(). +-type re_group() :: binary(). +-type captured() :: [re_group()]. + +%%%===================================================================== +%%% Callbacks +%%%===================================================================== -callback init(Opts) -> Engine when Opts :: opts() @@ -67,19 +104,9 @@ | {halt, scan()} . -% Libs - --include("bel_scan_eng.hrl"). - --type t() :: #engine{}. --type scan() :: bel_scan:t(). --type marker_id() :: bel_scan_mark:id(). --type token() :: bel_scan_token:t(). --type loc() :: bel_scan_loc:t(). --type opts() :: term(). --type state() :: term(). --type re_group() :: binary(). --type captured() :: [re_group()]. +%%%===================================================================== +%%% API functions +%%%===================================================================== compile(#engine{markers = Markers} = Eng) -> Eng#engine{markers = [bel_scan_mark:compile(M) || M <- Markers]}. @@ -87,6 +114,10 @@ compile(#engine{markers = Markers} = Eng) -> fold(#engine{} = Eng, Funs) when is_list(Funs) -> lists:foldl(fun(F, E) -> F(E) end, Eng, Funs). +%%%===================================================================== +%%% State getters and setters functions +%%%===================================================================== + get_module(#engine{module = Module}) -> Module. @@ -104,3 +135,9 @@ get_state(#engine{state = State}) -> set_state(State, #engine{} = Eng) -> Eng#engine{state = State}. + +%%%===================================================================== +%%% Internal functions +%%%===================================================================== + +% nothing here yet! diff --git a/src/bel_scan_eng_eel.erl b/src/bel_scan_eng_eel.erl index 48a858e..d801013 100644 --- a/src/bel_scan_eng_eel.erl +++ b/src/bel_scan_eng_eel.erl @@ -1,7 +1,28 @@ +%%%--------------------------------------------------------------------- +%%% @copyright 2024 William Fank Thomé +%%% @author William Fank Thomé +%%% @doc EEl engine module. +%%% +%%% Copyright 2024 William Fank Thomé +%%% +%%% Licensed under the Apache License, Version 2.0 (the "License"); +%%% you may not use this file except in compliance with the License. +%%% You may obtain a copy of the License at +%%% +%%% http://www.apache.org/licenses/LICENSE-2.0 +%%% +%%% Unless required by applicable law or agreed to in writing, software +%%% distributed under the License is distributed on an "AS IS" BASIS, +%%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%%% See the License for the specific language governing permissions and +%%% limitations under the License. +%%% +%%% @end +%%%--------------------------------------------------------------------- -module(bel_scan_eng_eel). -behaviour(bel_scan_eng). -% bel_scan_eng callbacks +% bel_scan_eng callback functions -export([ init/1 , handle_start/2 , handle_text/2 @@ -12,7 +33,7 @@ -include("bel_scan_eng.hrl"). %%%===================================================================== -%%% bel_scan_eng callbacks +%%% bel_scan_eng callback functions %%%===================================================================== init(_Opts) -> diff --git a/src/bel_scan_eng_html5.erl b/src/bel_scan_eng_html5.erl index 1fe140b..8b58b15 100644 --- a/src/bel_scan_eng_html5.erl +++ b/src/bel_scan_eng_html5.erl @@ -1,7 +1,28 @@ +%%%--------------------------------------------------------------------- +%%% @copyright 2024 William Fank Thomé +%%% @author William Fank Thomé +%%% @doc HTML5 engine module. +%%% +%%% Copyright 2024 William Fank Thomé +%%% +%%% Licensed under the Apache License, Version 2.0 (the "License"); +%%% you may not use this file except in compliance with the License. +%%% You may obtain a copy of the License at +%%% +%%% http://www.apache.org/licenses/LICENSE-2.0 +%%% +%%% Unless required by applicable law or agreed to in writing, software +%%% distributed under the License is distributed on an "AS IS" BASIS, +%%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%%% See the License for the specific language governing permissions and +%%% limitations under the License. +%%% +%%% @end +%%%--------------------------------------------------------------------- -module(bel_scan_eng_html5). -behaviour(bel_scan_eng). -% bel_scan_eng callbacks +% bel_scan_eng callback functions -export([ init/1 , handle_start/2 , handle_text/2 @@ -43,7 +64,7 @@ -record(state, { attrs_engines :: [module()] }). %%%===================================================================== -%%% bel_scan_eng callbacks +%%% bel_scan_eng callback functions %%%===================================================================== init(Opts) -> @@ -220,7 +241,7 @@ scan_test() -> <<"AAA">>}}, <<"title">>}, {text,{{{1,198},{1,202}},undefined},<<" bar">>} - ], scan_(?SLINE))} + ], scan_(?SLINE))} , { "Should scan multiple lines" , ?assertEqual([ {text,{{{1,1},{2,1}},undefined},<<"\n">>}, diff --git a/src/bel_scan_eng_html5_attr.erl b/src/bel_scan_eng_html5_attr.erl index 6e61766..7960323 100644 --- a/src/bel_scan_eng_html5_attr.erl +++ b/src/bel_scan_eng_html5_attr.erl @@ -1,7 +1,28 @@ +%%%--------------------------------------------------------------------- +%%% @copyright 2024 William Fank Thomé +%%% @author William Fank Thomé +%%% @doc HTML5 attributes engine module. +%%% +%%% Copyright 2024 William Fank Thomé +%%% +%%% Licensed under the Apache License, Version 2.0 (the "License"); +%%% you may not use this file except in compliance with the License. +%%% You may obtain a copy of the License at +%%% +%%% http://www.apache.org/licenses/LICENSE-2.0 +%%% +%%% Unless required by applicable law or agreed to in writing, software +%%% distributed under the License is distributed on an "AS IS" BASIS, +%%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%%% See the License for the specific language governing permissions and +%%% limitations under the License. +%%% +%%% @end +%%%--------------------------------------------------------------------- -module(bel_scan_eng_html5_attr). -behaviour(bel_scan_eng). -% bel_scan_eng callbacks +% bel_scan_eng callback functions -export([ init/1 , handle_start/2 , handle_text/2 @@ -12,7 +33,7 @@ -include("bel_scan_eng.hrl"). %%%===================================================================== -%%% bel_scan_eng callbacks +%%% bel_scan_eng callback functions %%%===================================================================== init(_Opts) -> diff --git a/src/bel_scan_loc.erl b/src/bel_scan_loc.erl index 435973a..24a803a 100644 --- a/src/bel_scan_loc.erl +++ b/src/bel_scan_loc.erl @@ -21,6 +21,7 @@ %%%--------------------------------------------------------------------- -module(bel_scan_loc). +% API functions -export([ new/1 , read/1 , read/2 @@ -30,7 +31,10 @@ , incr_col/2 , new_ln/1 , to_tuple/1 - , get_pos/1 + ]). + +% State getters and setters functions +-export([ get_pos/1 , set_pos/2 , get_ln/1 , set_ln/2 @@ -42,7 +46,7 @@ , set_first_col/2 ]). --export_type([ t/0, pos/0 ]). +-export_type([ t/0, pos/0, pos_tuple/0 ]). -define(FIRST_POS, 0). -define(FIRST_LN, 1). @@ -54,11 +58,12 @@ -record(loc, { pos, ln, col, first_ln, first_col }). --type t() :: #loc{}. --type pos() :: non_neg_integer(). +-type t() :: #loc{}. +-type pos() :: non_neg_integer(). +-type pos_tuple() :: {pos(), pos()}. %%%===================================================================== -%%% API +%%% API functions %%%===================================================================== new(Params) when is_map(Params) -> @@ -130,6 +135,10 @@ new_ln(#loc{ln = Ln, first_col = FirstCol, pos = Pos} = Loc) -> to_tuple(#loc{ln = Ln, col = Col}) -> {Ln, Col}. +%%%===================================================================== +%%% State getters and setters functions +%%%===================================================================== + get_pos(#loc{pos = Pos}) -> Pos. @@ -160,6 +169,12 @@ get_first_col(#loc{first_col = FirstCol}) -> set_first_col(FirstCol, #loc{} = Loc) -> Loc#loc{first_col = FirstCol}. +%%%===================================================================== +%%% Internal functions +%%%===================================================================== + +% nothing here yet! + %%%===================================================================== %%% Tests %%%===================================================================== diff --git a/src/bel_scan_mark.erl b/src/bel_scan_mark.erl index 84a1b60..093e5ce 100644 --- a/src/bel_scan_mark.erl +++ b/src/bel_scan_mark.erl @@ -1,8 +1,31 @@ +%%%--------------------------------------------------------------------- +%%% @copyright 2024 William Fank Thomé +%%% @author William Fank Thomé +%%% @doc Marker module. +%%% +%%% Copyright 2024 William Fank Thomé +%%% +%%% Licensed under the Apache License, Version 2.0 (the "License"); +%%% you may not use this file except in compliance with the License. +%%% You may obtain a copy of the License at +%%% +%%% http://www.apache.org/licenses/LICENSE-2.0 +%%% +%%% Unless required by applicable law or agreed to in writing, software +%%% distributed under the License is distributed on an "AS IS" BASIS, +%%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%%% See the License for the specific language governing permissions and +%%% limitations under the License. +%%% +%%% @end +%%%--------------------------------------------------------------------- -module(bel_scan_mark). --export([ compile/1 - , re_match/2 - , get_id/1 +% API functions +-export([ compile/1, re_match/2 ]). + +% State getters and setters functions +-export([ get_id/1 , set_id/2 , get_re/1 , set_re/2 @@ -20,7 +43,13 @@ -type t() :: #marker{}. -type id() :: atom(). --type re() :: binary() | {re_pattern, _, _, _, _}. % re:mp/0 isn't exported. +-type re() :: binary() + | {re_pattern, _, _, _, _} % re:mp/0 isn't exported. + . + +%%%===================================================================== +%%% API functions +%%%===================================================================== compile(#marker{} = Marker) -> case compile_re(Marker#marker.re) of @@ -44,6 +73,10 @@ re_match(#marker{re = RE}, Bin) -> nomatch end. +%%%===================================================================== +%%% State getters and setters functions +%%%===================================================================== + get_id(#marker{id = Id}) -> Id. @@ -55,3 +88,9 @@ get_re(#marker{re = RE}) -> set_re(RE, #marker{} = Marker) -> Marker#marker{re = RE}. + +%%%===================================================================== +%%% Internal functions +%%%===================================================================== + +% nothing here yet! diff --git a/src/bel_scan_read.erl b/src/bel_scan_read.erl index 346d9ba..c2093e0 100644 --- a/src/bel_scan_read.erl +++ b/src/bel_scan_read.erl @@ -1,11 +1,37 @@ +%%%--------------------------------------------------------------------- +%%% @copyright 2024 William Fank Thomé +%%% @author William Fank Thomé +%%% @doc Reader module. +%%% +%%% Copyright 2024 William Fank Thomé +%%% +%%% Licensed under the Apache License, Version 2.0 (the "License"); +%%% you may not use this file except in compliance with the License. +%%% You may obtain a copy of the License at +%%% +%%% http://www.apache.org/licenses/LICENSE-2.0 +%%% +%%% Unless required by applicable law or agreed to in writing, software +%%% distributed under the License is distributed on an "AS IS" BASIS, +%%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%%% See the License for the specific language governing permissions and +%%% limitations under the License. +%%% +%%% @end +%%%--------------------------------------------------------------------- -module(bel_scan_read). +% API functions -export([ bin/1 ]). -define(is_lf(X), ( X =:= $\r orelse X =:= $\n orelse X =:= $\f )). +%%%===================================================================== +%%% API functions +%%%===================================================================== + bin(<<$\r, $\n, Rest/binary>>) -> {{new_ln, 2}, Rest}; bin(<>) when ?is_lf(Char) -> @@ -14,3 +40,9 @@ bin(<<_, Rest/binary>>) -> {{continue, 1}, Rest}; bin(<<>>) -> terminate. + +%%%===================================================================== +%%% Internal functions +%%%===================================================================== + +% nothing here yet! diff --git a/src/bel_scan_token.erl b/src/bel_scan_token.erl deleted file mode 100644 index 258aa53..0000000 --- a/src/bel_scan_token.erl +++ /dev/null @@ -1,50 +0,0 @@ -% TODO: Delete. --module(bel_scan_token). - --export([ new/1 - , get_id/1 - , set_id/2 - , get_anno/1 - , set_anno/2 - , get_value/1 - , set_value/2 - ]). - --export_type([ t/0, id/0, anno/0, value/0 ]). - --record(token, { - id :: id(), - anno :: anno(), - value :: term() -}). - -% -type t() :: #token{}. --type t() :: {id(), anno(), value()}. --type id() :: atom(). --type anno() :: bel_scan_anno:t(). --type value() :: term(). - -new(Params) when is_map(Params) -> - #token{ - id = maps:get(id, Params), - anno = maps:get(anno, Params), - value = maps:get(value, Params) - }. - -get_id(#token{id = Id}) -> - Id. - -set_id(Id, #token{} = Token) -> - Token#token{id = Id}. - -get_anno(#token{anno = Anno}) -> - Anno. - -set_anno(Anno, #token{} = Token) -> - Token#token{anno = Anno}. - -get_value(#token{value = Value}) -> - Value. - -set_value(Value, #token{} = Token) -> - Token#token{value = Value}. From 580b4dbee281f871d0efd6412d590fc20c031a89 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?William=20Fank=20Thom=C3=A9?= Date: Thu, 25 Apr 2024 22:35:26 -0300 Subject: [PATCH 16/25] fix: wrong location --- src/bel_scan.erl | 5 ++--- src/bel_scan_eng_html5.erl | 27 +++++++++++++-------------- src/bel_scan_loc.erl | 15 --------------- 3 files changed, 15 insertions(+), 32 deletions(-) diff --git a/src/bel_scan.erl b/src/bel_scan.erl index 23f9029..77fd0ab 100644 --- a/src/bel_scan.erl +++ b/src/bel_scan.erl @@ -233,9 +233,8 @@ continue(find_start_markers, <>, State0) -> {match, {Mod, MarkerId, MatchText, Captured, Rest}} -> State1 = handle_text(State0), InitLoc = State1#state.loc, - MatchTextLoc = bel_scan_loc:read(MatchText), - EndLoc = bel_scan_loc:incr(MatchTextLoc, InitLoc), - Pos = bel_scan_loc:get_pos(MatchTextLoc) + bel_scan_loc:get_pos(InitLoc), + EndLoc = bel_scan_loc:read(MatchText, State1#state.loc), + Pos = bel_scan_loc:get_pos(EndLoc), BPart = bel_scan_bpart:reset_pos(Pos - State1#state.init_pos, State1#state.bpart), Match = {Mod, MarkerId, MatchText, Captured, {InitLoc, EndLoc}}, continue({handle_match, Match}, Rest, State1#state{ diff --git a/src/bel_scan_eng_html5.erl b/src/bel_scan_eng_html5.erl index 8b58b15..2165245 100644 --- a/src/bel_scan_eng_html5.erl +++ b/src/bel_scan_eng_html5.erl @@ -159,8 +159,7 @@ child_nodes(Bin, PrevParts, Loc, Scan) -> })). init_loc(PrevParts, {InitLoc, _EndLoc}) -> - BLoc = bel_scan_loc:read(iolist_to_binary(PrevParts)), - bel_scan_loc:incr(BLoc, InitLoc). + bel_scan_loc:read(iolist_to_binary(PrevParts), InitLoc). %%%===================================================================== %%% Tests @@ -246,28 +245,28 @@ scan_test() -> , ?assertEqual([ {text,{{{1,1},{2,1}},undefined},<<"\n">>}, {special_tag, - {{{2,1},{5,10}},{[],<<"\n\n
Foo
\n\n">>}}, + {{{2,1},{6,10}},{[],<<"\n\n
Foo
\n\n">>}}, <<"script">>}, - {text,{{{5,10},{6,1}},undefined},<<"\n">>}, + {text,{{{6,10},{7,1}},undefined},<<"\n">>}, {elem_tag, - {{{6,1},{6,34}}, + {{{7,1},{7,34}}, {[], - [{text,{{{6,7},{6,10}},undefined},<<" ">>}, - {elem_tag, - {{{6,10},{6,26}}, + [{text,{{{7,7},{7,10}},undefined},<<" ">>}, + {elem_tag, + {{{7,10},{7,26}}, {[], [{text, - {{{6,13},{6,22}},undefined}, + {{{7,13},{7,22}},undefined}, <<" ooooo ">>}]}}, - <<"i">>}, - {text,{{{6,26},{6,27}},undefined},<<" ">>}]}}, + <<"i">>}, + {text,{{{7,26},{7,27}},undefined},<<" ">>}]}}, <<"span">>}, - {text,{{{6,34},{9,1}},undefined},<<"ript>\n">>}, + {text,{{{7,34},{10,1}},undefined},<<"\nbar\n\n">>}, {special_tag, - {{{9,1},{13,21}}, + {{{10,1},{14,21}}, {[],<<"\n\n Title\n\n ">>}}, <<"title">>}, - {text,{{{13,21},{14,1}},undefined},<<"t">>} + {text,{{{14,21},{15,1}},undefined},<<"\n">>} ], scan_(?MLINE))} ]. diff --git a/src/bel_scan_loc.erl b/src/bel_scan_loc.erl index 24a803a..0b831f0 100644 --- a/src/bel_scan_loc.erl +++ b/src/bel_scan_loc.erl @@ -25,7 +25,6 @@ -export([ new/1 , read/1 , read/2 - , incr/2 , incr_pos/2 , incr_ln/2 , incr_col/2 @@ -96,20 +95,6 @@ do_read(Bin, Loc) -> Loc end. -incr(#loc{ln = Ln, col = Col}, Loc) -> - incr({Ln, Col}, Loc); -incr({Ln, Col}, #loc{first_ln = Ln} = Loc) -> - incr_pos(Col - Loc#loc.first_col, - set_col(Col - Loc#loc.first_col + Loc#loc.col, Loc)); -incr({Ln, Col}, #loc{ln = LocLn} = Loc) when Ln < LocLn -> - set_pos(Loc#loc.pos, - set_col(Col, - set_ln(Ln + Loc#loc.ln - Loc#loc.first_ln, Loc))); -incr({Ln, Col}, #loc{} = Loc) -> - set_pos(Loc#loc.pos, - set_col(Col, - set_ln(Ln, Loc))). - incr_pos(N, #loc{pos = Pos} = Loc) -> Loc#loc{pos = Pos+N}. From 06856e9172c82f9fe705ea71e0a496d02b028a6d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?William=20Fank=20Thom=C3=A9?= Date: Thu, 25 Apr 2024 22:39:19 -0300 Subject: [PATCH 17/25] fix(eel): wrong regex --- src/bel_scan_eng_eel.erl | 95 +++++++++++++++++++++++++++++++--------- 1 file changed, 74 insertions(+), 21 deletions(-) diff --git a/src/bel_scan_eng_eel.erl b/src/bel_scan_eng_eel.erl index d801013..fdf2cbb 100644 --- a/src/bel_scan_eng_eel.erl +++ b/src/bel_scan_eng_eel.erl @@ -32,6 +32,10 @@ -include("bel_scan_eng.hrl"). +-ifdef(TEST). +-include_lib("eunit/include/eunit.hrl"). +-endif. + %%%===================================================================== %%% bel_scan_eng callback functions %%%===================================================================== @@ -41,38 +45,23 @@ init(_Opts) -> markers = [ #marker{ id = inline, - re = << - "(<%=\\s+)(.*?[^\\s]*)(\\s+\\.%>)" "|" - "(<%=\\s+)(.*?[^\\s]*)(^(\\s*\\.%>))" - >> + re = <<"<%=\\s+((?:(?!<%).)*)\\s+\.%>">> }, #marker{ id = start, - re = << - "(<%=\\s+)(.*?[^\\s]*)(\\s+%>)" "|" - "(<%=\\s+)(.*?[^\\s]*)(^(\\s*%>))" - >> + re = <<"<%=\\s+((?:(?!<%).)*)\\s+%>">> }, #marker{ id = continue, - re = << - "(<%\\s+)(.*?[^\\s]*)(\\s+%>)" "|" - "(<%\\s+)(.*?[^\\s]*)(^(\\s*%>))" - >> + re = <<"<%\\s+((?:(?!<%).)*)\\s+%>">> }, #marker{ id = terminate, - re = << - "(<%\\s+)(.*?[^\\s]*)(\\s+\\.%>)" "|" - "(<%\\s+)(.*?[^\\s]*)(^(\\s*\\.%>))" - >> + re = <<"<%\\s+((?:(?!<%).)*)\\s+\.%>">> }, #marker{ id = comment, - re = << - "(<%!--\\s+)(.*?[^\\s]*)(\\s+--%>)" "|" - "(<%!--\\s+)(.*?[^\\s]*)(^(\\s*--%>))" - >> + re = <<"<%!--\s+((?:(?!<%).)*)\s+--%>">> } ] }. @@ -84,7 +73,7 @@ handle_text(_Text, State) -> {noreply, State}. handle_match({?MODULE, MarkerId, _Text, Captured, Loc}, State) -> - [_SMarker, Expr, _EMarker] = Captured, + [Expr] = Captured, Token = bel_scan:token(MarkerId, Expr, Loc), {reply, [Token], State}; handle_match({Mod, _, _, _, _}, State) when Mod =/= ?MODULE -> @@ -98,3 +87,67 @@ handle_terminate(_Tokens, State) -> %%%===================================================================== % nothing here yet! + +%%%===================================================================== +%%% Tests +%%%===================================================================== + +-ifdef(TEST). +-compile([export_all, nowarn_export_all]). + +% NOTE: Just a test, not intended to have a valid HTML syntax. + +-define(SLINE, << +"a <%= b .%> c <%= d %> e <% f %> g <% h .%> i" +>>). + +-define(MLINE, <<"a <%= b +.%> c <%= +d %> e +<% f + +%> + + g + +<% + +h + + .%> + +i + +">>). + +scan_(Bin) -> + bel_scan:get_tokens(bel_scan:bin(Bin, #{engines => [?MODULE]})). + +scan_test() -> + [ { "Should scan single line" + , ?assertEqual([ + {text,{{{1,1},{1,3}},undefined},<<"a ">>}, + {inline,{{{1,3},{1,12}},undefined},<<"b">>}, + {text,{{{1,12},{1,15}},undefined},<<" c ">>}, + {start,{{{1,15},{1,23}},undefined},<<"d">>}, + {text,{{{1,23},{1,26}},undefined},<<" e ">>}, + {continue,{{{1,26},{1,33}},undefined},<<"f">>}, + {text,{{{1,33},{1,36}},undefined},<<" g ">>}, + {terminate,{{{1,36},{1,44}},undefined},<<"h">>}, + {text,{{{1,44},{1,46}},undefined},<<" i">>} + ], scan_(?SLINE))} + , { "Should scan multiple lines" + , ?assertEqual([ + {text,{{{1,1},{1,3}},undefined},<<"a ">>}, + {inline,{{{1,3},{2,4}},undefined},<<"b">>}, + {text,{{{2,4},{2,7}},undefined},<<" c ">>}, + {start,{{{2,7},{3,5}},undefined},<<"d">>}, + {text,{{{3,5},{4,1}},undefined},<<" e\n">>}, + {continue,{{{4,1},{6,3}},undefined},<<"f">>}, + {text,{{{6,3},{10,1}},undefined},<<"\n\n g\n\n">>}, + {terminate,{{{10,1},{14,13}},undefined},<<"h">>}, + {text,{{{14,13},{18,1}},undefined},<<"\n\ni\n\n">>} + ], scan_(?MLINE))} + ]. + +-endif. From 2f867aa835fbf360886782262208221b6f0d2175 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?William=20Fank=20Thom=C3=A9?= Date: Thu, 25 Apr 2024 23:09:57 -0300 Subject: [PATCH 18/25] fix(example): wrong regex --- test/bel_scan_SUITE.erl | 25 +++++++++++++++++-------- test/support/support_scan_eng.erl | 16 +++++++++++----- 2 files changed, 28 insertions(+), 13 deletions(-) diff --git a/test/bel_scan_SUITE.erl b/test/bel_scan_SUITE.erl index f254ea4..a4dd0d3 100644 --- a/test/bel_scan_SUITE.erl +++ b/test/bel_scan_SUITE.erl @@ -145,15 +145,24 @@ bin(Config) when is_list(Config) -> {text,{{{1,29},{1,33}},undefined},<<" bar">>} ] = bel_scan:get_tokens(bel_scan:bin(SingleLnBin, Opts)), - MultiLnBin = << -"foo {{ {{A, b}, - {0, \"C\"}} }} - bar" - >>, + MultiLnBin = <<"foo + {{ {{A, b}, + {0, \"C\"}} }} + bar + +{{ {{ {{ d }} }} }} {{ a + +}} +">>, [ - {text,{{{1,1},{1,5}},undefined},<<"foo ">>}, - {expr,{{{1,5},{2,14}},undefined},<<"{{A, b},\n {0, \"C\"}}">>}, - {text,{{{2,14},{3,5}},undefined},<<"\n bar">>} + {text,{{{1,1},{2,5}},undefined},<<"foo\n ">>}, + {expr,{{{2,5},{3,21}},undefined}, + <<"{{A, b},\n {0, \"C\"}}">>}, + {text,{{{3,21},{6,1}},undefined},<<"\n bar\n\n">>}, + {expr,{{{6,1},{6,20}},undefined},<<"{{ {{ d }} }}">>}, + {text,{{{6,20},{6,22}},undefined},<<" ">>}, + {expr,{{{6,22},{8,3}},undefined},<<"a">>}, + {text,{{{8,3},{9,1}},undefined},<<"\n">>} ] = bel_scan:get_tokens(bel_scan:bin(MultiLnBin, Opts)), ok. diff --git a/test/support/support_scan_eng.erl b/test/support/support_scan_eng.erl index 923a322..ee5100a 100644 --- a/test/support/support_scan_eng.erl +++ b/test/support/support_scan_eng.erl @@ -42,8 +42,10 @@ init(_Opts) -> #marker{ id = expr, re = << - "({{\\s*)(.*[^}]+[^{]+[^\\s])(\\s*}})" "|" - "({{\\s*)(.*[^\\s])(\\s*}})" + % nested case {{ {{ foo}} }} + "{{\\s*(.*?[^}]+[^{]+[^\\s])\\s*}}" "|" + % simple case {{ foo }} + "{{\\s*(.*?[^\\s])\\s*}}" >> } ] @@ -55,11 +57,15 @@ handle_start(_Bin, State) -> handle_text(_Text, State) -> {noreply, State}. -handle_match({?MODULE, expr, _Text, Captured, Loc}, State) -> - [_SMarker, Expr, _EMarker] = Captured, +% nested case +handle_match({?MODULE, expr, _Text, [Expr], Loc}, State) -> Token = bel_scan:token(expr, Expr, Loc), {reply, [Token], State}; -handle_match(_Match, State) -> +% simple case +handle_match({?MODULE, expr, _Text, [<<>>, Expr], Loc}, State) -> + Token = bel_scan:token(expr, Expr, Loc), + {reply, [Token], State}; +handle_match({Mod, _, _, _, _}, State) when Mod =/= ?MODULE -> {noreply, State}. handle_terminate(_Tokens, State) -> From 4a2b7e82cb389396276b65d111a052cfa0223f02 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?William=20Fank=20Thom=C3=A9?= Date: Fri, 26 Apr 2024 10:01:27 -0300 Subject: [PATCH 19/25] refactor: organize code --- src/bel_scan.erl | 32 ++++++++++---------------------- src/bel_scan_eng_html5_attr.erl | 14 ++++++++++---- 2 files changed, 20 insertions(+), 26 deletions(-) diff --git a/src/bel_scan.erl b/src/bel_scan.erl index 77fd0ab..0dac511 100644 --- a/src/bel_scan.erl +++ b/src/bel_scan.erl @@ -68,10 +68,6 @@ -include("bel_scan_eng.hrl"). --ifdef(TEST). --include_lib("eunit/include/eunit.hrl"). --endif. - -define(DEFAULT_OPTS, #{}). -define(DEFAULT_META, undefined). @@ -138,15 +134,6 @@ token(Id, Value, Loc) -> token(Id, Value, Metadata, Loc) -> {Id, anno(Loc, Metadata), Value}. -anno({InitLoc0, EndLoc0}, Metadata) -> - InitLoc = bel_scan_loc:to_tuple(InitLoc0), - EndLoc = bel_scan_loc:to_tuple(EndLoc0), - {{InitLoc, EndLoc}, Metadata}. - -clear_text(#state{bpart = BPart} = State) -> - Pos = bel_scan_loc:get_pos(State#state.loc), - State#state{bpart = bel_scan_bpart:reset_pos(Pos - State#state.init_pos, BPart)}. - push_token(Token, #state{tokens = Tokens} = State) -> State#state{tokens = Tokens ++ [Token]}. @@ -235,7 +222,7 @@ continue(find_start_markers, <>, State0) -> InitLoc = State1#state.loc, EndLoc = bel_scan_loc:read(MatchText, State1#state.loc), Pos = bel_scan_loc:get_pos(EndLoc), - BPart = bel_scan_bpart:reset_pos(Pos - State1#state.init_pos, State1#state.bpart), + BPart = reset_bpart_pos(Pos, State1), Match = {Mod, MarkerId, MatchText, Captured, {InitLoc, EndLoc}}, continue({handle_match, Match}, Rest, State1#state{ loc = EndLoc, @@ -344,13 +331,14 @@ do_handle_terminate([{Mod, _Eng} | Engs], Tokens0, State0) -> do_handle_terminate([], Tokens, State) -> State#state{tokens = Tokens}. -%%%===================================================================== -%%% Tests -%%%===================================================================== - --ifdef(TEST). --compile([export_all, nowarn_export_all]). +anno({InitLoc0, EndLoc0}, Metadata) -> + InitLoc = bel_scan_loc:to_tuple(InitLoc0), + EndLoc = bel_scan_loc:to_tuple(EndLoc0), + {{InitLoc, EndLoc}, Metadata}. -% TODO +clear_text(#state{loc = Loc} = State) -> + Pos = bel_scan_loc:get_pos(Loc), + State#state{bpart = reset_bpart_pos(Pos, State)}. --endif. +reset_bpart_pos(Pos, #state{init_pos = InitPos, bpart = BPart}) -> + bel_scan_bpart:reset_pos(Pos - InitPos, BPart). diff --git a/src/bel_scan_eng_html5_attr.erl b/src/bel_scan_eng_html5_attr.erl index 7960323..52d0f92 100644 --- a/src/bel_scan_eng_html5_attr.erl +++ b/src/bel_scan_eng_html5_attr.erl @@ -42,8 +42,11 @@ init(_Opts) -> #marker{ id = attribute, re = << + % case double quote "(\\w+)=\\\"(.*?[^\\\\\"])\\\"" "|" + % case single quote "(\\w+)='(.*?[^\\\\'])'" "|" + % case attribute "(\\w+)" >> } @@ -56,14 +59,17 @@ handle_start(_Bin, State) -> handle_text(_Text, State) -> {halt, State}. -handle_match({?MODULE, attribute, Text, [<<>>, <<>>, <<>>, <<>>, K], Loc}, State) -> - Token = bel_scan:token(attribute, Text, K, Loc), +% case double quote +handle_match({?MODULE, attribute, Text, [K, V], Loc}, State) -> + Token = bel_scan:token(attribute, Text, {K, V}, Loc), {reply, [Token], State}; +% case single quote handle_match({?MODULE, attribute, Text, [<<>>, <<>>, K, V], Loc}, State) -> Token = bel_scan:token(attribute, Text, {K, V}, Loc), {reply, [Token], State}; -handle_match({?MODULE, attribute, Text, [K, V], Loc}, State) -> - Token = bel_scan:token(attribute, Text, {K, V}, Loc), +% case attribute +handle_match({?MODULE, attribute, Text, [<<>>, <<>>, <<>>, <<>>, K], Loc}, State) -> + Token = bel_scan:token(attribute, Text, K, Loc), {reply, [Token], State}; handle_match({Mod, _, _, _, _}, State) when Mod =/= ?MODULE -> {noreply, State}. From c61e35096470635c128cc4ba0f1770998abc1ee5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?William=20Fank=20Thom=C3=A9?= Date: Fri, 26 Apr 2024 18:11:53 -0300 Subject: [PATCH 20/25] feat: anno module --- src/bel_scan.erl | 59 ++++-- src/bel_scan_anno.erl | 97 ++++++++++ src/bel_scan_eng.erl | 8 +- src/bel_scan_eng_eel.erl | 108 +++++++++-- src/bel_scan_eng_html5.erl | 305 +++++++++++++++++++++--------- src/bel_scan_eng_html5_attr.erl | 14 +- test/bel_scan_SUITE.erl | 54 ++++-- test/support/support_scan_eng.erl | 20 +- 8 files changed, 509 insertions(+), 156 deletions(-) create mode 100644 src/bel_scan_anno.erl diff --git a/src/bel_scan.erl b/src/bel_scan.erl index 0dac511..26c7798 100644 --- a/src/bel_scan.erl +++ b/src/bel_scan.erl @@ -29,8 +29,9 @@ , state/2 , fold/2 , text_token/2 + , text_token/3 + , token/2 , token/3 - , token/4 , push_token/2 , push_tokens/2 , init_engines/1 @@ -38,7 +39,9 @@ ]). % State getters and setters functions --export([ get_engines/1 +-export([ get_src/1 + , set_src/2 + , get_engines/1 , set_engines/2 , get_bpart/1 , set_bpart/2 @@ -51,6 +54,7 @@ ]). -export_type([ t/0 + , src/0 , engine/0 , bpart/0 , loc/0 @@ -71,7 +75,8 @@ -define(DEFAULT_OPTS, #{}). -define(DEFAULT_META, undefined). --record(state, { engines :: [{module(), engine()}] +-record(state, { src :: src() + , engines :: [{module(), engine()}] , bpart :: bpart() , loc :: loc() , prev_loc :: loc() @@ -80,6 +85,7 @@ }). -opaque t() :: #state{}. +-type src() :: bel_scan_anno:src(). -type engine() :: bel_scan_eng:t(). -type bpart() :: bel_scan_bpart:t(). -type loc() :: bel_scan_loc:t(). @@ -98,6 +104,7 @@ new(Params) when is_map(Params) -> Loc = maps:get(loc, Params, bel_scan_loc:new(#{})), #state{ + src = maps:get(src, Params, string), engines = init_engines(maps:get(engines, Params)), bpart = maps:get(bpart, Params, bel_scan_bpart:new(#{ bin => maps:get(bin, Params, <<>>) @@ -122,17 +129,24 @@ state(Bin, #state{bpart = BPart} = State) when is_binary(Bin) -> fold(#state{} = State, Funs) -> lists:foldl(fun(F, S) -> F(S) end, State, Funs). -text_token(Text, #state{} = State) -> - Loc = {State#state.prev_loc, State#state.loc}, - token(text, Text, ?DEFAULT_META, Loc); -text_token(Text, Loc) -> - token(text, Text, ?DEFAULT_META, Loc). +text_token(Text, State) -> + text_token(Text, ?DEFAULT_META, State). -token(Id, Value, Loc) -> - token(Id, Value, ?DEFAULT_META, Loc). +text_token(Text, Metadata, #state{} = State) -> + Anno = bel_scan_anno:new(#{ + src => State#state.src, + loc => State#state.prev_loc, + end_loc => State#state.loc, + text => Text + }), + token(text, Anno, Metadata). -token(Id, Value, Metadata, Loc) -> - {Id, anno(Loc, Metadata), Value}. +token(Id, Anno) -> + {Id, Anno, ?DEFAULT_META}. + +token(Id, Anno, Metadata) when is_atom(Id) -> + true = bel_scan_anno:is_anno(Anno), + {Id, Anno, Metadata}. push_token(Token, #state{tokens = Tokens} = State) -> State#state{tokens = Tokens ++ [Token]}. @@ -150,6 +164,12 @@ lookup_engine(Mod, #state{engines = Engines}) -> %%% State getters and setters functions %%%===================================================================== +get_src(#state{src = Src}) -> + Src. + +set_src(Src, #state{} = State) -> + State#state{src = Src}. + get_engines(#state{engines = Engines}) -> Engines. @@ -219,11 +239,17 @@ continue(find_start_markers, <>, State0) -> case find_marker(State0#state.engines, Rest0) of {match, {Mod, MarkerId, MatchText, Captured, Rest}} -> State1 = handle_text(State0), - InitLoc = State1#state.loc, + Loc = State1#state.loc, EndLoc = bel_scan_loc:read(MatchText, State1#state.loc), + Anno = bel_scan_anno:new(#{ + src => State1#state.src, + loc => Loc, + end_loc => EndLoc, + text => MatchText + }), + Match = {Mod, MarkerId, Captured, Anno}, Pos = bel_scan_loc:get_pos(EndLoc), BPart = reset_bpart_pos(Pos, State1), - Match = {Mod, MarkerId, MatchText, Captured, {InitLoc, EndLoc}}, continue({handle_match, Match}, Rest, State1#state{ loc = EndLoc, prev_loc = EndLoc, @@ -331,11 +357,6 @@ do_handle_terminate([{Mod, _Eng} | Engs], Tokens0, State0) -> do_handle_terminate([], Tokens, State) -> State#state{tokens = Tokens}. -anno({InitLoc0, EndLoc0}, Metadata) -> - InitLoc = bel_scan_loc:to_tuple(InitLoc0), - EndLoc = bel_scan_loc:to_tuple(EndLoc0), - {{InitLoc, EndLoc}, Metadata}. - clear_text(#state{loc = Loc} = State) -> Pos = bel_scan_loc:get_pos(Loc), State#state{bpart = reset_bpart_pos(Pos, State)}. diff --git a/src/bel_scan_anno.erl b/src/bel_scan_anno.erl new file mode 100644 index 0000000..f057faf --- /dev/null +++ b/src/bel_scan_anno.erl @@ -0,0 +1,97 @@ +%%%--------------------------------------------------------------------- +%%% @copyright 2024 William Fank Thomé +%%% @author William Fank Thomé +%%% @doc Annotation module. +%%% +%%% Copyright 2024 William Fank Thomé +%%% +%%% Licensed under the Apache License, Version 2.0 (the "License"); +%%% you may not use this file except in compliance with the License. +%%% You may obtain a copy of the License at +%%% +%%% http://www.apache.org/licenses/LICENSE-2.0 +%%% +%%% Unless required by applicable law or agreed to in writing, software +%%% distributed under the License is distributed on an "AS IS" BASIS, +%%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%%% See the License for the specific language governing permissions and +%%% limitations under the License. +%%% +%%% @end +%%%--------------------------------------------------------------------- +-module(bel_scan_anno). + +% API functions +-export([ new/1, is_anno/1 ]). + +% State getters and setters functions +-export([ get_src/1 + , set_src/2 + , get_loc/1 + , set_loc/2 + , get_end_loc/1 + , set_end_loc/2 + , get_text/1 + , set_text/2 + ]). + +-export_type([ t/0, src/0, ln/0, col/0, loc/0, text/0 ]). + +-record(anno, { src :: src() + , loc :: loc() + , end_loc :: loc() + , text :: text() + }). + +-opaque t() :: #anno{}. +-type src() :: {file, file:filename_all()} + | {module, module()} + | string + . +-type ln() :: pos_integer(). +-type col() :: pos_integer(). +-type loc() :: bel_scan_loc:t(). +-type text() :: binary(). + +%%%===================================================================== +%%% API functions +%%%===================================================================== + +new(Params) when is_map(Params) -> + #anno{ + src = maps:get(src, Params), + loc = maps:get(loc, Params), + end_loc = maps:get(end_loc, Params), + text = maps:get(text, Params) + }. + +is_anno(X) -> + is_record(X, anno). + +%%%===================================================================== +%%% State getters and setters functions +%%%===================================================================== + +get_src(#anno{src = Src}) -> + Src. + +set_src(Src, #anno{} = Anno) -> + Anno#anno{src = Src}. + +get_loc(#anno{loc = Loc}) -> + Loc. + +set_loc(Loc, #anno{} = Anno) -> + Anno#anno{loc = Loc}. + +get_end_loc(#anno{end_loc = EndLoc}) -> + EndLoc. + +set_end_loc(EndLoc, #anno{} = Anno) -> + Anno#anno{end_loc = EndLoc}. + +get_text(#anno{text = Text}) -> + Text. + +set_text(Text, #anno{} = Anno) -> + Anno#anno{text = Text}. diff --git a/src/bel_scan_eng.erl b/src/bel_scan_eng.erl index 6ed4308..1fc9cd1 100644 --- a/src/bel_scan_eng.erl +++ b/src/bel_scan_eng.erl @@ -55,6 +55,7 @@ -type state() :: term(). -type re_group() :: binary(). -type captured() :: [re_group()]. +-type anno() :: bel_scan_anno:t(). %%%===================================================================== %%% Callbacks @@ -82,14 +83,11 @@ . -callback handle_match(Match, Scan) -> Return - when Match :: {MarkerMod, MarkerId, Text, Captured, Loc} + when Match :: {MarkerMod, MarkerId, Captured, Anno} , MarkerMod :: module() , MarkerId :: marker_id() - , Text :: binary() , Captured :: captured() - , Loc :: {InitLoc, EndLoc} - , InitLoc :: loc() - , EndLoc :: loc() + , Anno :: anno() , Scan :: scan() , Return :: {noreply, scan()} | {reply, [token()], scan()} diff --git a/src/bel_scan_eng_eel.erl b/src/bel_scan_eng_eel.erl index fdf2cbb..4442203 100644 --- a/src/bel_scan_eng_eel.erl +++ b/src/bel_scan_eng_eel.erl @@ -72,11 +72,11 @@ handle_start(_Bin, State) -> handle_text(_Text, State) -> {noreply, State}. -handle_match({?MODULE, MarkerId, _Text, Captured, Loc}, State) -> +handle_match({?MODULE, MarkerId, Captured, Anno}, State) -> [Expr] = Captured, - Token = bel_scan:token(MarkerId, Expr, Loc), + Token = bel_scan:token(MarkerId, Anno, Expr), {reply, [Token], State}; -handle_match({Mod, _, _, _, _}, State) when Mod =/= ?MODULE -> +handle_match({Mod, _, _, _}, State) when Mod =/= ?MODULE -> {noreply, State}. handle_terminate(_Tokens, State) -> @@ -126,27 +126,93 @@ scan_(Bin) -> scan_test() -> [ { "Should scan single line" , ?assertEqual([ - {text,{{{1,1},{1,3}},undefined},<<"a ">>}, - {inline,{{{1,3},{1,12}},undefined},<<"b">>}, - {text,{{{1,12},{1,15}},undefined},<<" c ">>}, - {start,{{{1,15},{1,23}},undefined},<<"d">>}, - {text,{{{1,23},{1,26}},undefined},<<" e ">>}, - {continue,{{{1,26},{1,33}},undefined},<<"f">>}, - {text,{{{1,33},{1,36}},undefined},<<" g ">>}, - {terminate,{{{1,36},{1,44}},undefined},<<"h">>}, - {text,{{{1,44},{1,46}},undefined},<<" i">>} + {text,{anno,string,{loc,0,1,1,1,1},{loc,2,1,3,1,1},<<"a ">>}, + undefined}, + {inline,{anno,string, + {loc,2,1,3,1,1}, + {loc,11,1,12,1,1}, + <<"<%= b .%>">>}, + <<"b">>}, + {text,{anno,string, + {loc,11,1,12,1,1}, + {loc,14,1,15,1,1}, + <<" c ">>}, + undefined}, + {start,{anno,string, + {loc,14,1,15,1,1}, + {loc,22,1,23,1,1}, + <<"<%= d %>">>}, + <<"d">>}, + {text,{anno,string, + {loc,22,1,23,1,1}, + {loc,25,1,26,1,1}, + <<" e ">>}, + undefined}, + {continue,{anno,string, + {loc,25,1,26,1,1}, + {loc,32,1,33,1,1}, + <<"<% f %>">>}, + <<"f">>}, + {text,{anno,string, + {loc,32,1,33,1,1}, + {loc,35,1,36,1,1}, + <<" g ">>}, + undefined}, + {terminate,{anno,string, + {loc,35,1,36,1,1}, + {loc,43,1,44,1,1}, + <<"<% h .%>">>}, + <<"h">>}, + {text,{anno,string, + {loc,43,1,44,1,1}, + {loc,45,1,46,1,1}, + <<" i">>}, + undefined} ], scan_(?SLINE))} , { "Should scan multiple lines" , ?assertEqual([ - {text,{{{1,1},{1,3}},undefined},<<"a ">>}, - {inline,{{{1,3},{2,4}},undefined},<<"b">>}, - {text,{{{2,4},{2,7}},undefined},<<" c ">>}, - {start,{{{2,7},{3,5}},undefined},<<"d">>}, - {text,{{{3,5},{4,1}},undefined},<<" e\n">>}, - {continue,{{{4,1},{6,3}},undefined},<<"f">>}, - {text,{{{6,3},{10,1}},undefined},<<"\n\n g\n\n">>}, - {terminate,{{{10,1},{14,13}},undefined},<<"h">>}, - {text,{{{14,13},{18,1}},undefined},<<"\n\ni\n\n">>} + {text,{anno,string,{loc,0,1,1,1,1},{loc,2,1,3,1,1},<<"a ">>}, + undefined}, + {inline,{anno,string, + {loc,2,1,3,1,1}, + {loc,11,2,4,1,1}, + <<"<%= b\n.%>">>}, + <<"b">>}, + {text,{anno,string, + {loc,11,2,4,1,1}, + {loc,14,2,7,1,1}, + <<" c ">>}, + undefined}, + {start,{anno,string, + {loc,14,2,7,1,1}, + {loc,22,3,5,1,1}, + <<"<%=\nd %>">>}, + <<"d">>}, + {text,{anno,string, + {loc,22,3,5,1,1}, + {loc,25,4,1,1,1}, + <<" e\n">>}, + undefined}, + {continue,{anno,string, + {loc,25,4,1,1,1}, + {loc,33,6,3,1,1}, + <<"<% f\n\n%>">>}, + <<"f">>}, + {text,{anno,string, + {loc,33,6,3,1,1}, + {loc,41,10,1,1,1}, + <<"\n\n g\n\n">>}, + undefined}, + {terminate,{anno,string, + {loc,41,10,1,1,1}, + {loc,60,14,13,1,1}, + <<"<%\n\nh\n\n .%>">>}, + <<"h">>}, + {text,{anno,string, + {loc,60,14,13,1,1}, + {loc,65,18,1,1,1}, + <<"\n\ni\n\n">>}, + undefined} ], scan_(?MLINE))} ]. diff --git a/src/bel_scan_eng_html5.erl b/src/bel_scan_eng_html5.erl index 2165245..6bbdcea 100644 --- a/src/bel_scan_eng_html5.erl +++ b/src/bel_scan_eng_html5.erl @@ -110,28 +110,34 @@ handle_start(_Bin, Scan) -> handle_text(_Text, Scan) -> {noreply, Scan}. -handle_match({?MODULE, doctype, _Text, [], Loc}, Scan) -> - Token = bel_scan:token(doctype, <<"html">>, Loc), +handle_match({?MODULE, doctype, [], Anno}, Scan) -> + Token = bel_scan:token(doctype, Anno, <<"html">>), {reply, [Token], Scan}; -handle_match({?MODULE, special_tag, _Text, Captured, Loc}, Scan) -> +handle_match({?MODULE, special_tag, Captured, Anno}, Scan) -> [OAngB, Tag, Attrs, _CAngB, Content, _CTag] = Captured, - Metadata = {attributes(Attrs, [OAngB, Tag], Loc, Scan), Content}, - Token = bel_scan:token(special_tag, Tag, Metadata, Loc), + Metadata = { + attributes(Attrs, [OAngB, Tag], Anno, Scan), + Content + }, + Token = bel_scan:token(binary_to_atom(Tag), Anno, Metadata), {reply, [Token], Scan}; -handle_match({?MODULE, void_tag, _Text, Captured, Loc}, Scan) -> +handle_match({?MODULE, void_tag, Captured, Anno}, Scan) -> [OAngB, Tag, Attrs, _CAngB] = Captured, - Metadata = attributes(Attrs, [OAngB, Tag], Loc, Scan), - Token = bel_scan:token(void_tag, Tag, Metadata, Loc), + Metadata = { + attributes(Attrs, [OAngB, Tag], Anno, Scan), + [] + }, + Token = bel_scan:token(binary_to_atom(Tag), Anno, Metadata), {reply, [Token], Scan}; -handle_match({?MODULE, elem_tag, _Text, Captured, Loc}, Scan) -> +handle_match({?MODULE, elem_tag, Captured, Anno}, Scan) -> [OAngB, Tag, Attrs, CAngB, ChildNodes, _CTag] = Captured, Metadata = { - attributes(Attrs, [OAngB, Tag], Loc, Scan), - child_nodes(ChildNodes, [OAngB, Tag, Attrs, CAngB], Loc, Scan) + attributes(Attrs, [OAngB, Tag], Anno, Scan), + child_nodes(ChildNodes, [OAngB, Tag, Attrs, CAngB], Anno, Scan) }, - Token = bel_scan:token(elem_tag, Tag, Metadata, Loc), + Token = bel_scan:token(binary_to_atom(Tag), Anno, Metadata), {reply, [Token], Scan}; -handle_match({Mod, _, _, _, _}, Scan) when Mod =/= ?MODULE -> +handle_match({Mod, _, _, _}, Scan) when Mod =/= ?MODULE -> {noreply, Scan}. handle_terminate(_Tokens, Scan) -> @@ -145,21 +151,22 @@ state(Scan) -> {?MODULE, Engine} = bel_scan:lookup_engine(?MODULE, Scan), Engine#engine.state. -attributes(Bin, PrevParts, Loc, Scan) -> +attributes(Bin, PrevParts, Anno, Scan) -> State = state(Scan), bel_scan:get_tokens(bel_scan:bin(Bin, #{ engines => State#state.attrs_engines, - loc => init_loc(PrevParts, Loc) + loc => init_loc(PrevParts, Anno) })). -child_nodes(Bin, PrevParts, Loc, Scan) -> +child_nodes(Bin, PrevParts, Anno, Scan) -> bel_scan:get_tokens(bel_scan:bin(Bin, #{ engines => bel_scan:get_engines(Scan), - loc => init_loc(PrevParts, Loc) + loc => init_loc(PrevParts, Anno) })). -init_loc(PrevParts, {InitLoc, _EndLoc}) -> - bel_scan_loc:read(iolist_to_binary(PrevParts), InitLoc). +init_loc(PrevParts, Anno) -> + Loc = bel_scan_anno:get_loc(Anno), + bel_scan_loc:read(iolist_to_binary(PrevParts), Loc). %%%===================================================================== %%% Tests @@ -196,77 +203,205 @@ scan_(Bin) -> scan_test() -> [ { "Should scan single line" , ?assertEqual([ - {text,{{{1,1},{1,6}},undefined},<<" ">>}, - {doctype,{{{1,6},{1,21}},undefined},<<"html">>}, - {text,{{{1,21},{1,30}},undefined},<<" Lalala ">>}, - {void_tag, - {{{1,30},{1,47}}, - [{attribute,{{{1,37},{1,43}},<<"hidden">>},<<"hidden">>}]}, - <<"area">>}, - {text,{{{1,47},{1,48}},undefined},<<" ">>}, - {void_tag,{{{1,48},{1,57}},[]},<<"input">>}, - {text,{{{1,57},{1,58}},undefined},<<" ">>}, - {elem_tag, - {{{1,58},{1,129}}, - {[{attribute, - {{{1,63},{1,71}},{<<"id">>,<<"foo">>}}, - <<"id=\"foo\"">>}, - {attribute, - {{{1,72},{1,87}},{<<"title">>,<<"b\\'a\\'r">>}}, - <<"title='b\\'a\\'r'">>}, - {attribute,{{{1,91},{1,97}},<<"hidden">>},<<"hidden">>}], - [{text,{{{1,99},{1,100}},undefined},<<" ">>}, - {elem_tag, - {{{1,100},{1,122}}, - {[], - [{elem_tag, - {{{1,105},{1,115}}, - {[],[{text,{{{1,108},{1,111}},undefined},<<"vvv">>}]}}, - <<"b">>}, - {text,{{{1,115},{1,116}},undefined},<<" ">>}]}}, - <<"div">>}, - {text,{{{1,122},{1,123}},undefined},<<" ">>}]}}, - <<"div">>}, - {text,{{{1,129},{1,140}},undefined},<<" Some test ">>}, - {special_tag, - {{{1,140},{1,198}}, - {[{attribute,{{{1,147},{1,153}},<<"hidden">>},<<"hidden">>}, - {attribute, - {{{1,158},{1,166}},{<<"id">>,<<"foo">>}}, - <<"id='foo'">>}, - {attribute, - {{{1,168},{1,176}},<<"required">>}, - <<"required">>}], - <<"AAA">>}}, - <<"title">>}, - {text,{{{1,198},{1,202}},undefined},<<" bar">>} + {text, + {anno,string,{loc,0,1,1,1,1},{loc,5,1,6,1,1},<<" ">>}, + undefined}, + {doctype, + {anno,string, + {loc,5,1,6,1,1}, + {loc,20,1,21,1,1}, + <<"">>}, + <<"html">>}, + {text, + {anno,string, + {loc,20,1,21,1,1}, + {loc,29,1,30,1,1}, + <<" Lalala ">>}, + undefined}, + {area, + {anno,string, + {loc,29,1,30,1,1}, + {loc,46,1,47,1,1}, + <<"">>}, + {[{attribute, + {anno,string, + {loc,36,1,37,1,1}, + {loc,42,1,43,1,1}, + <<"hidden">>}, + <<"hidden">>}], + []}}, + {text, + {anno,string,{loc,46,1,47,1,1},{loc,47,1,48,1,1},<<" ">>}, + undefined}, + {input, + {anno,string, + {loc,47,1,48,1,1}, + {loc,56,1,57,1,1}, + <<"">>}, + {[],[]}}, + {text, + {anno,string,{loc,56,1,57,1,1},{loc,57,1,58,1,1},<<" ">>}, + undefined}, + {'div', + {anno,string, + {loc,57,1,58,1,1}, + {loc,128,1,129,1,1}, + <<"">>}, + {[{attribute, + {anno,string, + {loc,62,1,63,1,1}, + {loc,70,1,71,1,1}, + <<"id=\"foo\"">>}, + {<<"id">>,<<"foo">>}}, + {attribute, + {anno,string, + {loc,71,1,72,1,1}, + {loc,86,1,87,1,1}, + <<"title='b\\'a\\'r'">>}, + {<<"title">>,<<"b\\'a\\'r">>}}, + {attribute, + {anno,string, + {loc,90,1,91,1,1}, + {loc,96,1,97,1,1}, + <<"hidden">>}, + <<"hidden">>}], + [{text, + {anno,string, + {loc,98,1,99,1,1}, + {loc,99,1,100,1,1}, + <<" ">>}, + undefined}, + {'div', + {anno,string, + {loc,99,1,100,1,1}, + {loc,121,1,122,1,1}, + <<"
vvv
">>}, + {[], + [{b, + {anno,string, + {loc,104,1,105,1,1}, + {loc,114,1,115,1,1}, + <<"vvv">>}, + {[], + [{text, + {anno,string, + {loc,107,1,108,1,1}, + {loc,110,1,111,1,1}, + <<"vvv">>}, + undefined}]}}, + {text, + {anno,string, + {loc,114,1,115,1,1}, + {loc,115,1,116,1,1}, + <<" ">>}, + undefined}]}}, + {text, + {anno,string, + {loc,121,1,122,1,1}, + {loc,122,1,123,1,1}, + <<" ">>}, + undefined}]}}, + {text, + {anno,string, + {loc,128,1,129,1,1}, + {loc,139,1,140,1,1}, + <<" Some test ">>}, + undefined}, + {title, + {anno,string, + {loc,139,1,140,1,1}, + {loc,197,1,198,1,1}, + <<"">>}, + {[{attribute, + {anno,string, + {loc,146,1,147,1,1}, + {loc,152,1,153,1,1}, + <<"hidden">>}, + <<"hidden">>}, + {attribute, + {anno,string, + {loc,157,1,158,1,1}, + {loc,165,1,166,1,1}, + <<"id='foo'">>}, + {<<"id">>,<<"foo">>}}, + {attribute, + {anno,string, + {loc,167,1,168,1,1}, + {loc,175,1,176,1,1}, + <<"required">>}, + <<"required">>}], + <<"AAA">>}}, + {text, + {anno,string, + {loc,197,1,198,1,1}, + {loc,201,1,202,1,1}, + <<" bar">>}, + undefined} ], scan_(?SLINE))} , { "Should scan multiple lines" , ?assertEqual([ - {text,{{{1,1},{2,1}},undefined},<<"\n">>}, - {special_tag, - {{{2,1},{6,10}},{[],<<"\n\n
Foo
\n\n">>}}, - <<"script">>}, - {text,{{{6,10},{7,1}},undefined},<<"\n">>}, - {elem_tag, - {{{7,1},{7,34}}, + {text, + {anno,string,{loc,0,1,1,1,1},{loc,1,2,1,1,1},<<"\n">>}, + undefined}, + {script, + {anno,string, + {loc,1,2,1,1,1}, + {loc,40,6,10,1,1}, + <<"">>}, + {[],<<"\n\n
Foo
\n\n">>}}, + {text, + {anno,string, + {loc,40,6,10,1,1}, + {loc,41,7,1,1,1}, + <<"\n">>}, + undefined}, + {span, + {anno,string, + {loc,41,7,1,1,1}, + {loc,74,7,34,1,1}, + <<" ooooo ">>}, {[], - [{text,{{{7,7},{7,10}},undefined},<<" ">>}, - {elem_tag, - {{{7,10},{7,26}}, - {[], - [{text, - {{{7,13},{7,22}},undefined}, - <<" ooooo ">>}]}}, - <<"i">>}, - {text,{{{7,26},{7,27}},undefined},<<" ">>}]}}, - <<"span">>}, - {text,{{{7,34},{10,1}},undefined},<<"\nbar\n\n">>}, - {special_tag, - {{{10,1},{14,21}}, + [{text, + {anno,string, + {loc,47,7,7,1,1}, + {loc,50,7,10,1,1}, + <<" ">>}, + undefined}, + {i,{anno,string, + {loc,50,7,10,1,1}, + {loc,66,7,26,1,1}, + <<" ooooo ">>}, + {[], + [{text, + {anno,string, + {loc,53,7,13,1,1}, + {loc,62,7,22,1,1}, + <<" ooooo ">>}, + undefined}]}}, + {text, + {anno,string, + {loc,66,7,26,1,1}, + {loc,67,7,27,1,1}, + <<" ">>}, + undefined}]}}, + {text, + {anno,string, + {loc,74,7,34,1,1}, + {loc,80,10,1,1,1}, + <<"\nbar\n\n">>}, + undefined}, + {title, + {anno,string, + {loc,80,10,1,1,1}, + {loc,120,14,21,1,1}, + <<"\n\n Title\n\n ">>}, {[],<<"\n\n Title\n\n ">>}}, - <<"title">>}, - {text,{{{14,21},{15,1}},undefined},<<"\n">>} + {text, + {anno,string, + {loc,120,14,21,1,1}, + {loc,121,15,1,1,1}, + <<"\n">>}, + undefined} ], scan_(?MLINE))} ]. diff --git a/src/bel_scan_eng_html5_attr.erl b/src/bel_scan_eng_html5_attr.erl index 52d0f92..2095117 100644 --- a/src/bel_scan_eng_html5_attr.erl +++ b/src/bel_scan_eng_html5_attr.erl @@ -60,18 +60,18 @@ handle_text(_Text, State) -> {halt, State}. % case double quote -handle_match({?MODULE, attribute, Text, [K, V], Loc}, State) -> - Token = bel_scan:token(attribute, Text, {K, V}, Loc), +handle_match({?MODULE, attribute, [K, V], Anno}, State) -> + Token = bel_scan:token(attribute, Anno, {K, V}), {reply, [Token], State}; % case single quote -handle_match({?MODULE, attribute, Text, [<<>>, <<>>, K, V], Loc}, State) -> - Token = bel_scan:token(attribute, Text, {K, V}, Loc), +handle_match({?MODULE, attribute, [<<>>, <<>>, K, V], Anno}, State) -> + Token = bel_scan:token(attribute, Anno, {K, V}), {reply, [Token], State}; % case attribute -handle_match({?MODULE, attribute, Text, [<<>>, <<>>, <<>>, <<>>, K], Loc}, State) -> - Token = bel_scan:token(attribute, Text, K, Loc), +handle_match({?MODULE, attribute, [<<>>, <<>>, <<>>, <<>>, K], Anno}, State) -> + Token = bel_scan:token(attribute, Anno, K), {reply, [Token], State}; -handle_match({Mod, _, _, _, _}, State) when Mod =/= ?MODULE -> +handle_match({Mod, _, _, _}, State) when Mod =/= ?MODULE -> {noreply, State}. handle_terminate(_Tokens, State) -> diff --git a/test/bel_scan_SUITE.erl b/test/bel_scan_SUITE.erl index a4dd0d3..2a7a962 100644 --- a/test/bel_scan_SUITE.erl +++ b/test/bel_scan_SUITE.erl @@ -140,9 +140,17 @@ bin(Config) when is_list(Config) -> SingleLnBin = <<"foo {{ {{A, b}, {0, \"C\"}} }} bar">>, [ - {text,{{{1,1},{1,5}},undefined},<<"foo ">>}, - {expr,{{{1,5},{1,29}},undefined},<<"{{A, b}, {0, \"C\"}}">>}, - {text,{{{1,29},{1,33}},undefined},<<" bar">>} + {text,{anno,string,{loc,0,1,1,1,1},{loc,4,1,5,1,1},<<"foo ">>}, + undefined}, + {expr,{anno,string, + {loc,4,1,5,1,1}, + {loc,28,1,29,1,1}, + <<"{{ {{A, b}, {0, \"C\"}} }}">>}, + [{tuple,1, + [{tuple,1,[{var,1,'A'},{atom,1,b}]}, + {tuple,1,[{integer,1,0},{string,1,"C"}]}]}]}, + {text,{anno,string,{loc,28,1,29,1,1},{loc,32,1,33,1,1},<<" bar">>}, + undefined} ] = bel_scan:get_tokens(bel_scan:bin(SingleLnBin, Opts)), MultiLnBin = <<"foo @@ -155,14 +163,38 @@ bin(Config) when is_list(Config) -> }} ">>, [ - {text,{{{1,1},{2,5}},undefined},<<"foo\n ">>}, - {expr,{{{2,5},{3,21}},undefined}, - <<"{{A, b},\n {0, \"C\"}}">>}, - {text,{{{3,21},{6,1}},undefined},<<"\n bar\n\n">>}, - {expr,{{{6,1},{6,20}},undefined},<<"{{ {{ d }} }}">>}, - {text,{{{6,20},{6,22}},undefined},<<" ">>}, - {expr,{{{6,22},{8,3}},undefined},<<"a">>}, - {text,{{{8,3},{9,1}},undefined},<<"\n">>} + {text,{anno,string, + {loc,0,1,1,1,1}, + {loc,8,2,5,1,1}, + <<"foo\n ">>}, + undefined}, + {expr,{anno,string, + {loc,8,2,5,1,1}, + {loc,40,3,21,1,1}, + <<"{{ {{A, b},\n {0, \"C\"}} }}">>}, + [{tuple,1, + [{tuple,1,[{var,1,'A'},{atom,1,b}]}, + {tuple,2,[{integer,2,0},{string,2,"C"}]}]}]}, + {text,{anno,string, + {loc,40,3,21,1,1}, + {loc,47,6,1,1,1}, + <<"\n bar\n\n">>}, + undefined}, + {expr,{anno,string, + {loc,47,6,1,1,1}, + {loc,66,6,20,1,1}, + <<"{{ {{ {{ d }} }} }}">>}, + [{tuple,1, + [{tuple,1,[{tuple,1,[{tuple,1,[{atom,1,d}]}]}]}]}]}, + {text,{anno,string,{loc,66,6,20,1,1},{loc,68,6,22,1,1},<<" ">>}, + undefined}, + {expr,{anno,string, + {loc,68,6,22,1,1}, + {loc,76,8,3,1,1}, + <<"{{ a\n\n}}">>}, + [{atom,1,a}]}, + {text,{anno,string,{loc,76,8,3,1,1},{loc,77,9,1,1,1},<<"\n">>}, + undefined} ] = bel_scan:get_tokens(bel_scan:bin(MultiLnBin, Opts)), ok. diff --git a/test/support/support_scan_eng.erl b/test/support/support_scan_eng.erl index ee5100a..f62d7a0 100644 --- a/test/support/support_scan_eng.erl +++ b/test/support/support_scan_eng.erl @@ -58,14 +58,12 @@ handle_text(_Text, State) -> {noreply, State}. % nested case -handle_match({?MODULE, expr, _Text, [Expr], Loc}, State) -> - Token = bel_scan:token(expr, Expr, Loc), - {reply, [Token], State}; +handle_match({?MODULE, expr, [Expr], Anno}, State) -> + {reply, [token(Anno, Expr)], State}; % simple case -handle_match({?MODULE, expr, _Text, [<<>>, Expr], Loc}, State) -> - Token = bel_scan:token(expr, Expr, Loc), - {reply, [Token], State}; -handle_match({Mod, _, _, _, _}, State) when Mod =/= ?MODULE -> +handle_match({?MODULE, expr, [<<>>, Expr], Anno}, State) -> + {reply, [token(Anno, Expr)], State}; +handle_match({Mod, _, _, _}, State) when Mod =/= ?MODULE -> {noreply, State}. handle_terminate(_Tokens, State) -> @@ -75,4 +73,10 @@ handle_terminate(_Tokens, State) -> %%% Internal functions %%%===================================================================== -% nothing here yet! +token(Anno, Expr) -> + bel_scan:token(expr, Anno, scan(Expr)). + +scan(Expr) -> + {ok, Tokens, _} = erl_scan:string(binary_to_list(<>)), + {ok, AST} = erl_parse:parse_exprs(Tokens), + AST. From df875709ddb21b0c55a8c0201a5166b94f0fb9c5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?William=20Fank=20Thom=C3=A9?= Date: Sun, 28 Apr 2024 09:52:14 -0300 Subject: [PATCH 21/25] feat(marker): permit regex be a string --- src/bel_scan_mark.erl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/bel_scan_mark.erl b/src/bel_scan_mark.erl index 093e5ce..f2d81d8 100644 --- a/src/bel_scan_mark.erl +++ b/src/bel_scan_mark.erl @@ -44,6 +44,7 @@ -type t() :: #marker{}. -type id() :: atom(). -type re() :: binary() + | string() | {re_pattern, _, _, _, _} % re:mp/0 isn't exported. . @@ -59,7 +60,7 @@ compile(#marker{} = Marker) -> error({re, Reason}, [Marker]) end. -compile_re(RE) when is_binary(RE) -> +compile_re(RE) when is_binary(RE); is_list(RE) -> re:compile(RE, [anchored, multiline, ucp, {newline, anycrlf}]); compile_re(Pattern) when ?is_re_pattern(Pattern) -> {ok, Pattern}. From fa76b33809bec39cea93419fe29be88382ffa2d2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?William=20Fank=20Thom=C3=A9?= Date: Sun, 28 Apr 2024 09:53:07 -0300 Subject: [PATCH 22/25] feat(eel): change markers id and re --- src/bel_scan_eng_eel.erl | 29 ++++++++++++++++------------- 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/src/bel_scan_eng_eel.erl b/src/bel_scan_eng_eel.erl index 4442203..e81e48a 100644 --- a/src/bel_scan_eng_eel.erl +++ b/src/bel_scan_eng_eel.erl @@ -44,24 +44,28 @@ init(_Opts) -> #engine{ markers = [ #marker{ - id = inline, - re = <<"<%=\\s+((?:(?!<%).)*)\\s+\.%>">> + id = '<%=', + re = "<%=" }, #marker{ - id = start, - re = <<"<%=\\s+((?:(?!<%).)*)\\s+%>">> + id = '.%>', + re = "\\.\\s*%>" }, #marker{ - id = continue, - re = <<"<%\\s+((?:(?!<%).)*)\\s+%>">> + id = '<%', + re = "<%(?:(?!=))" }, #marker{ - id = terminate, - re = <<"<%\\s+((?:(?!<%).)*)\\s+\.%>">> + id = '%>', + re = "(?:(?!\\.))%>" }, #marker{ - id = comment, - re = <<"<%!--\s+((?:(?!<%).)*)\s+--%>">> + id = '<%!--', + re = "<%!--" + }, + #marker{ + id = '--%>', + re = "--%>" } ] }. @@ -72,9 +76,8 @@ handle_start(_Bin, State) -> handle_text(_Text, State) -> {noreply, State}. -handle_match({?MODULE, MarkerId, Captured, Anno}, State) -> - [Expr] = Captured, - Token = bel_scan:token(MarkerId, Anno, Expr), +handle_match({?MODULE, MarkerId, [], Anno}, State) -> + Token = bel_scan:token(MarkerId, Anno), {reply, [Token], State}; handle_match({Mod, _, _, _}, State) when Mod =/= ?MODULE -> {noreply, State}. From 320a6e12f9f187b94458ddc32549244798df1e12 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?William=20Fank=20Thom=C3=A9?= Date: Sun, 28 Apr 2024 10:11:06 -0300 Subject: [PATCH 23/25] refactor: check is_anno using a guard --- src/bel_scan.erl | 7 +++++-- src/bel_scan_anno.erl | 5 +---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/bel_scan.erl b/src/bel_scan.erl index 26c7798..4cf8a6e 100644 --- a/src/bel_scan.erl +++ b/src/bel_scan.erl @@ -75,6 +75,10 @@ -define(DEFAULT_OPTS, #{}). -define(DEFAULT_META, undefined). +-define(is_anno(X), ( + is_tuple(X) andalso element(1, X) =:= anno +)). + -record(state, { src :: src() , engines :: [{module(), engine()}] , bpart :: bpart() @@ -144,8 +148,7 @@ text_token(Text, Metadata, #state{} = State) -> token(Id, Anno) -> {Id, Anno, ?DEFAULT_META}. -token(Id, Anno, Metadata) when is_atom(Id) -> - true = bel_scan_anno:is_anno(Anno), +token(Id, Anno, Metadata) when is_atom(Id), ?is_anno(Anno) -> {Id, Anno, Metadata}. push_token(Token, #state{tokens = Tokens} = State) -> diff --git a/src/bel_scan_anno.erl b/src/bel_scan_anno.erl index f057faf..9b5ff4a 100644 --- a/src/bel_scan_anno.erl +++ b/src/bel_scan_anno.erl @@ -22,7 +22,7 @@ -module(bel_scan_anno). % API functions --export([ new/1, is_anno/1 ]). +-export([ new/1 ]). % State getters and setters functions -export([ get_src/1 @@ -65,9 +65,6 @@ new(Params) when is_map(Params) -> text = maps:get(text, Params) }. -is_anno(X) -> - is_record(X, anno). - %%%===================================================================== %%% State getters and setters functions %%%===================================================================== From 4f2add5cba09343e763a7faa5066f7927e71ca69 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?William=20Fank=20Thom=C3=A9?= Date: Mon, 29 Apr 2024 21:31:57 -0300 Subject: [PATCH 24/25] feat: token module --- src/bel_scan.erl | 73 +++----- src/bel_scan_anno.erl | 15 +- src/bel_scan_eng.erl | 2 +- src/bel_scan_eng_eel.erl | 290 +++++++++++++++++++++--------- src/bel_scan_eng_html5.erl | 259 +++++++++++++------------- src/bel_scan_eng_html5_attr.erl | 16 +- src/bel_scan_token.erl | 82 +++++++++ test/bel_scan_SUITE.erl | 69 ++++--- test/support/support_scan_eng.erl | 6 +- 9 files changed, 525 insertions(+), 287 deletions(-) create mode 100644 src/bel_scan_token.erl diff --git a/src/bel_scan.erl b/src/bel_scan.erl index 4cf8a6e..e55ca83 100644 --- a/src/bel_scan.erl +++ b/src/bel_scan.erl @@ -28,14 +28,13 @@ , state/1 , state/2 , fold/2 + , init_engines/1 + , lookup_engine/2 , text_token/2 , text_token/3 - , token/2 - , token/3 , push_token/2 , push_tokens/2 - , init_engines/1 - , lookup_engine/2 + , yecc_tokens/1 ]). % State getters and setters functions @@ -58,13 +57,8 @@ , engine/0 , bpart/0 , loc/0 - , token/0 - , token_id/0 - , token_anno/0 - , token_metadata/0 - , token_loc/0 - , token_value/0 , pos/0 + , token/0 ]). -import(bel_scan_loc, [ new_ln/1, incr_col/2 ]). @@ -75,10 +69,6 @@ -define(DEFAULT_OPTS, #{}). -define(DEFAULT_META, undefined). --define(is_anno(X), ( - is_tuple(X) andalso element(1, X) =:= anno -)). - -record(state, { src :: src() , engines :: [{module(), engine()}] , bpart :: bpart() @@ -88,18 +78,13 @@ , init_pos :: pos() }). --opaque t() :: #state{}. --type src() :: bel_scan_anno:src(). --type engine() :: bel_scan_eng:t(). --type bpart() :: bel_scan_bpart:t(). --type loc() :: bel_scan_loc:t(). --type token() :: {token_id(), token_anno(), token_value()}. --type token_id() :: atom(). --type token_anno() :: {{token_loc(), token_loc()}, token_metadata()}. --type token_metadata() :: term(). --type token_loc() :: bel_scan_loc:pos_tuple(). --type token_value() :: binary(). --type pos() :: bel_scan_loc:pos(). +-opaque t() :: #state{}. +-type src() :: bel_scan_anno:src(). +-type engine() :: bel_scan_eng:t(). +-type bpart() :: bel_scan_bpart:t(). +-type loc() :: bel_scan_loc:t(). +-type pos() :: bel_scan_loc:pos(). +-type token() :: bel_scan_token:t(). %%%===================================================================== %%% API functions @@ -133,23 +118,26 @@ state(Bin, #state{bpart = BPart} = State) when is_binary(Bin) -> fold(#state{} = State, Funs) -> lists:foldl(fun(F, S) -> F(S) end, State, Funs). +init_engines(Modules) -> + [init_engine(Mod) || Mod <- Modules]. + +lookup_engine(Mod, #state{engines = Engines}) -> + proplists:lookup(Mod, Engines). + text_token(Text, State) -> text_token(Text, ?DEFAULT_META, State). text_token(Text, Metadata, #state{} = State) -> - Anno = bel_scan_anno:new(#{ - src => State#state.src, - loc => State#state.prev_loc, - end_loc => State#state.loc, - text => Text - }), - token(text, Anno, Metadata). - -token(Id, Anno) -> - {Id, Anno, ?DEFAULT_META}. - -token(Id, Anno, Metadata) when is_atom(Id), ?is_anno(Anno) -> - {Id, Anno, Metadata}. + bel_scan_token:new(#{ + id => text, + anno => bel_scan_anno:new(#{ + src => State#state.src, + loc => State#state.prev_loc, + end_loc => State#state.loc, + text => Text + }), + metadata => Metadata + }). push_token(Token, #state{tokens = Tokens} = State) -> State#state{tokens = Tokens ++ [Token]}. @@ -157,11 +145,8 @@ push_token(Token, #state{tokens = Tokens} = State) -> push_tokens(Tokens, State) when is_list(Tokens) -> lists:foldl(fun push_token/2, State, Tokens). -init_engines(Modules) -> - [init_engine(Mod) || Mod <- Modules]. - -lookup_engine(Mod, #state{engines = Engines}) -> - proplists:lookup(Mod, Engines). +yecc_tokens(#state{tokens = Tokens}) -> + [bel_scan_token:to_yecc(Token) || Token <- Tokens]. %%%===================================================================== %%% State getters and setters functions diff --git a/src/bel_scan_anno.erl b/src/bel_scan_anno.erl index 9b5ff4a..595aa4a 100644 --- a/src/bel_scan_anno.erl +++ b/src/bel_scan_anno.erl @@ -22,7 +22,7 @@ -module(bel_scan_anno). % API functions --export([ new/1 ]). +-export([ new/1, to_yecc/1 ]). % State getters and setters functions -export([ get_src/1 @@ -65,6 +65,19 @@ new(Params) when is_map(Params) -> text = maps:get(text, Params) }. +to_yecc(#anno{loc = Loc, text = Text, src = Src}) -> + Anno0 = erl_anno:new(bel_scan_loc:to_tuple(Loc)), + Anno = erl_anno:set_text(binary_to_list(Text), Anno0), + case Src of + {file, File} -> + erl_anno:set_file(File, Anno); + {module, Mod} -> + File = proplists:get_value(source, Mod:module_info(compile)), + erl_anno:set_file(File, Anno); + string -> + Anno + end. + %%%===================================================================== %%% State getters and setters functions %%%===================================================================== diff --git a/src/bel_scan_eng.erl b/src/bel_scan_eng.erl index 1fc9cd1..9e48091 100644 --- a/src/bel_scan_eng.erl +++ b/src/bel_scan_eng.erl @@ -49,7 +49,7 @@ -type t() :: #engine{}. -type scan() :: bel_scan:t(). -type marker_id() :: bel_scan_mark:id(). --type token() :: bel_scan:token(). +-type token() :: bel_scan_token:t(). -type loc() :: bel_scan_loc:t(). -type opts() :: term(). -type state() :: term(). diff --git a/src/bel_scan_eng_eel.erl b/src/bel_scan_eng_eel.erl index e81e48a..fc97d50 100644 --- a/src/bel_scan_eng_eel.erl +++ b/src/bel_scan_eng_eel.erl @@ -77,7 +77,7 @@ handle_text(_Text, State) -> {noreply, State}. handle_match({?MODULE, MarkerId, [], Anno}, State) -> - Token = bel_scan:token(MarkerId, Anno), + Token = bel_scan_token:new(#{id => MarkerId, anno => Anno}), {reply, [Token], State}; handle_match({Mod, _, _, _}, State) when Mod =/= ?MODULE -> {noreply, State}. @@ -129,93 +129,213 @@ scan_(Bin) -> scan_test() -> [ { "Should scan single line" , ?assertEqual([ - {text,{anno,string,{loc,0,1,1,1,1},{loc,2,1,3,1,1},<<"a ">>}, - undefined}, - {inline,{anno,string, - {loc,2,1,3,1,1}, - {loc,11,1,12,1,1}, - <<"<%= b .%>">>}, - <<"b">>}, - {text,{anno,string, - {loc,11,1,12,1,1}, - {loc,14,1,15,1,1}, - <<" c ">>}, - undefined}, - {start,{anno,string, - {loc,14,1,15,1,1}, - {loc,22,1,23,1,1}, - <<"<%= d %>">>}, - <<"d">>}, - {text,{anno,string, - {loc,22,1,23,1,1}, - {loc,25,1,26,1,1}, - <<" e ">>}, - undefined}, - {continue,{anno,string, - {loc,25,1,26,1,1}, - {loc,32,1,33,1,1}, - <<"<% f %>">>}, - <<"f">>}, - {text,{anno,string, - {loc,32,1,33,1,1}, - {loc,35,1,36,1,1}, - <<" g ">>}, - undefined}, - {terminate,{anno,string, - {loc,35,1,36,1,1}, - {loc,43,1,44,1,1}, - <<"<% h .%>">>}, - <<"h">>}, - {text,{anno,string, - {loc,43,1,44,1,1}, - {loc,45,1,46,1,1}, - <<" i">>}, - undefined} + {token,text, + {anno,string, + {loc,0,1,1,1,1}, + {loc,2,1,3,1,1}, + <<"a ">>}, + undefined}, + {token,'<%=', + {anno,string, + {loc,2,1,3,1,1}, + {loc,5,1,6,1,1}, + <<"<%=">>}, + undefined}, + {token,text, + {anno,string, + {loc,5,1,6,1,1}, + {loc,8,1,9,1,1}, + <<" b ">>}, + undefined}, + {token,'.%>', + {anno,string, + {loc,8,1,9,1,1}, + {loc,11,1,12,1,1}, + <<".%>">>}, + undefined}, + {token,text, + {anno,string, + {loc,11,1,12,1,1}, + {loc,14,1,15,1,1}, + <<" c ">>}, + undefined}, + {token,'<%=', + {anno,string, + {loc,14,1,15,1,1}, + {loc,17,1,18,1,1}, + <<"<%=">>}, + undefined}, + {token,text, + {anno,string, + {loc,17,1,18,1,1}, + {loc,20,1,21,1,1}, + <<" d ">>}, + undefined}, + {token,'%>', + {anno,string, + {loc,20,1,21,1,1}, + {loc,22,1,23,1,1}, + <<"%>">>}, + undefined}, + {token,text, + {anno,string, + {loc,22,1,23,1,1}, + {loc,25,1,26,1,1}, + <<" e ">>}, + undefined}, + {token,'<%', + {anno,string, + {loc,25,1,26,1,1}, + {loc,27,1,28,1,1}, + <<"<%">>}, + undefined}, + {token,text, + {anno,string, + {loc,27,1,28,1,1}, + {loc,30,1,31,1,1}, + <<" f ">>}, + undefined}, + {token,'%>', + {anno,string, + {loc,30,1,31,1,1}, + {loc,32,1,33,1,1}, + <<"%>">>}, + undefined}, + {token,text, + {anno,string, + {loc,32,1,33,1,1}, + {loc,35,1,36,1,1}, + <<" g ">>}, + undefined}, + {token,'<%', + {anno,string, + {loc,35,1,36,1,1}, + {loc,37,1,38,1,1}, + <<"<%">>}, + undefined}, + {token,text, + {anno,string, + {loc,37,1,38,1,1}, + {loc,40,1,41,1,1}, + <<" h ">>}, + undefined}, + {token,'.%>', + {anno,string, + {loc,40,1,41,1,1}, + {loc,43,1,44,1,1}, + <<".%>">>}, + undefined}, + {token,text, + {anno,string, + {loc,43,1,44,1,1}, + {loc,45,1,46,1,1}, + <<" i">>}, + undefined} ], scan_(?SLINE))} , { "Should scan multiple lines" , ?assertEqual([ - {text,{anno,string,{loc,0,1,1,1,1},{loc,2,1,3,1,1},<<"a ">>}, - undefined}, - {inline,{anno,string, - {loc,2,1,3,1,1}, - {loc,11,2,4,1,1}, - <<"<%= b\n.%>">>}, - <<"b">>}, - {text,{anno,string, - {loc,11,2,4,1,1}, - {loc,14,2,7,1,1}, - <<" c ">>}, - undefined}, - {start,{anno,string, - {loc,14,2,7,1,1}, - {loc,22,3,5,1,1}, - <<"<%=\nd %>">>}, - <<"d">>}, - {text,{anno,string, - {loc,22,3,5,1,1}, - {loc,25,4,1,1,1}, - <<" e\n">>}, - undefined}, - {continue,{anno,string, - {loc,25,4,1,1,1}, - {loc,33,6,3,1,1}, - <<"<% f\n\n%>">>}, - <<"f">>}, - {text,{anno,string, - {loc,33,6,3,1,1}, - {loc,41,10,1,1,1}, - <<"\n\n g\n\n">>}, - undefined}, - {terminate,{anno,string, - {loc,41,10,1,1,1}, - {loc,60,14,13,1,1}, - <<"<%\n\nh\n\n .%>">>}, - <<"h">>}, - {text,{anno,string, - {loc,60,14,13,1,1}, - {loc,65,18,1,1,1}, - <<"\n\ni\n\n">>}, - undefined} + {token,text, + {anno,string, + {loc,0,1,1,1,1}, + {loc,2,1,3,1,1}, + <<"a ">>}, + undefined}, + {token,'<%=', + {anno,string, + {loc,2,1,3,1,1}, + {loc,5,1,6,1,1}, + <<"<%=">>}, + undefined}, + {token,text, + {anno,string, + {loc,5,1,6,1,1}, + {loc,8,2,1,1,1}, + <<" b\n">>}, + undefined}, + {token,'.%>', + {anno,string, + {loc,8,2,1,1,1}, + {loc,11,2,4,1,1}, + <<".%>">>}, + undefined}, + {token,text, + {anno,string, + {loc,11,2,4,1,1}, + {loc,14,2,7,1,1}, + <<" c ">>}, + undefined}, + {token,'<%=', + {anno,string, + {loc,14,2,7,1,1}, + {loc,17,2,10,1,1}, + <<"<%=">>}, + undefined}, + {token,text, + {anno,string, + {loc,17,2,10,1,1}, + {loc,20,3,3,1,1}, + <<"\nd ">>}, + undefined}, + {token,'%>', + {anno,string, + {loc,20,3,3,1,1}, + {loc,22,3,5,1,1}, + <<"%>">>}, + undefined}, + {token,text, + {anno,string, + {loc,22,3,5,1,1}, + {loc,25,4,1,1,1}, + <<" e\n">>}, + undefined}, + {token,'<%', + {anno,string, + {loc,25,4,1,1,1}, + {loc,27,4,3,1,1}, + <<"<%">>}, + undefined}, + {token,text, + {anno,string, + {loc,27,4,3,1,1}, + {loc,31,6,1,1,1}, + <<" f\n\n">>}, + undefined}, + {token,'%>', + {anno,string, + {loc,31,6,1,1,1}, + {loc,33,6,3,1,1}, + <<"%>">>}, + undefined}, + {token,text, + {anno,string, + {loc,33,6,3,1,1}, + {loc,41,10,1,1,1}, + <<"\n\n g\n\n">>}, + undefined}, + {token,'<%', + {anno,string, + {loc,41,10,1,1,1}, + {loc,43,10,3,1,1}, + <<"<%">>}, + undefined}, + {token,text, + {anno,string, + {loc,43,10,3,1,1}, + {loc,57,14,10,1,1}, + <<"\n\nh\n\n ">>}, + undefined}, + {token,'.%>', + {anno,string, + {loc,57,14,10,1,1}, + {loc,60,14,13,1,1}, + <<".%>">>}, + undefined}, + {token,text, + {anno,string, + {loc,60,14,13,1,1}, + {loc,65,18,1,1,1}, + <<"\n\ni\n\n">>}, + undefined} ], scan_(?MLINE))} ]. diff --git a/src/bel_scan_eng_html5.erl b/src/bel_scan_eng_html5.erl index 6bbdcea..e9a0547 100644 --- a/src/bel_scan_eng_html5.erl +++ b/src/bel_scan_eng_html5.erl @@ -111,7 +111,11 @@ handle_text(_Text, Scan) -> {noreply, Scan}. handle_match({?MODULE, doctype, [], Anno}, Scan) -> - Token = bel_scan:token(doctype, Anno, <<"html">>), + Token = bel_scan_token:new(#{ + id => doctype, + anno => Anno, + metadata => <<"html">> + }), {reply, [Token], Scan}; handle_match({?MODULE, special_tag, Captured, Anno}, Scan) -> [OAngB, Tag, Attrs, _CAngB, Content, _CTag] = Captured, @@ -119,7 +123,11 @@ handle_match({?MODULE, special_tag, Captured, Anno}, Scan) -> attributes(Attrs, [OAngB, Tag], Anno, Scan), Content }, - Token = bel_scan:token(binary_to_atom(Tag), Anno, Metadata), + Token = bel_scan_token:new(#{ + id => binary_to_atom(Tag), + anno => Anno, + metadata => Metadata + }), {reply, [Token], Scan}; handle_match({?MODULE, void_tag, Captured, Anno}, Scan) -> [OAngB, Tag, Attrs, _CAngB] = Captured, @@ -127,7 +135,11 @@ handle_match({?MODULE, void_tag, Captured, Anno}, Scan) -> attributes(Attrs, [OAngB, Tag], Anno, Scan), [] }, - Token = bel_scan:token(binary_to_atom(Tag), Anno, Metadata), + Token = bel_scan_token:new(#{ + id => binary_to_atom(Tag), + anno => Anno, + metadata => Metadata + }), {reply, [Token], Scan}; handle_match({?MODULE, elem_tag, Captured, Anno}, Scan) -> [OAngB, Tag, Attrs, CAngB, ChildNodes, _CTag] = Captured, @@ -135,7 +147,11 @@ handle_match({?MODULE, elem_tag, Captured, Anno}, Scan) -> attributes(Attrs, [OAngB, Tag], Anno, Scan), child_nodes(ChildNodes, [OAngB, Tag, Attrs, CAngB], Anno, Scan) }, - Token = bel_scan:token(binary_to_atom(Tag), Anno, Metadata), + Token = bel_scan_token:new(#{ + id => binary_to_atom(Tag), + anno => Anno, + metadata => Metadata + }), {reply, [Token], Scan}; handle_match({Mod, _, _, _}, Scan) when Mod =/= ?MODULE -> {noreply, Scan}. @@ -203,200 +219,201 @@ scan_(Bin) -> scan_test() -> [ { "Should scan single line" , ?assertEqual([ - {text, - {anno,string,{loc,0,1,1,1,1},{loc,5,1,6,1,1},<<" ">>}, - undefined}, - {doctype, - {anno,string, - {loc,5,1,6,1,1}, - {loc,20,1,21,1,1}, - <<"">>}, - <<"html">>}, - {text, - {anno,string, - {loc,20,1,21,1,1}, - {loc,29,1,30,1,1}, - <<" Lalala ">>}, - undefined}, - {area, - {anno,string, - {loc,29,1,30,1,1}, - {loc,46,1,47,1,1}, - <<"">>}, - {[{attribute, + {token,text, + {anno,string,{loc,0,1,1,1,1},{loc,5,1,6,1,1},<<" ">>}, + undefined}, + {token,doctype, + {anno,string, + {loc,5,1,6,1,1}, + {loc,20,1,21,1,1}, + <<"">>}, + <<"html">>}, + {token,text, + {anno,string, + {loc,20,1,21,1,1}, + {loc,29,1,30,1,1}, + <<" Lalala ">>}, + undefined}, + {token,area, + {anno,string, + {loc,29,1,30,1,1}, + {loc,46,1,47,1,1}, + <<"">>}, + {[{token,attribute, {anno,string, - {loc,36,1,37,1,1}, - {loc,42,1,43,1,1}, - <<"hidden">>}, + {loc,36,1,37,1,1}, + {loc,42,1,43,1,1}, + <<"hidden">>}, <<"hidden">>}], - []}}, - {text, - {anno,string,{loc,46,1,47,1,1},{loc,47,1,48,1,1},<<" ">>}, - undefined}, - {input, - {anno,string, - {loc,47,1,48,1,1}, - {loc,56,1,57,1,1}, - <<"">>}, - {[],[]}}, - {text, - {anno,string,{loc,56,1,57,1,1},{loc,57,1,58,1,1},<<" ">>}, - undefined}, - {'div', + []}}, + {token,text, + {anno,string,{loc,46,1,47,1,1},{loc,47,1,48,1,1},<<" ">>}, + undefined}, + {token,input, + {anno,string, + {loc,47,1,48,1,1}, + {loc,56,1,57,1,1}, + <<"">>}, + {[],[]}}, + {token,text, + {anno,string,{loc,56,1,57,1,1},{loc,57,1,58,1,1},<<" ">>}, + undefined}, + {token,'div', + {anno,string, + {loc,57,1,58,1,1}, + {loc,128,1,129,1,1}, + <<"">>}, + {[{token,attribute, {anno,string, - {loc,57,1,58,1,1}, - {loc,128,1,129,1,1}, - <<"">>}, - {[{attribute, - {anno,string, - {loc,62,1,63,1,1}, - {loc,70,1,71,1,1}, - <<"id=\"foo\"">>}, + {loc,62,1,63,1,1}, + {loc,70,1,71,1,1}, + <<"id=\"foo\"">>}, {<<"id">>,<<"foo">>}}, - {attribute, + {token,attribute, {anno,string, - {loc,71,1,72,1,1}, - {loc,86,1,87,1,1}, - <<"title='b\\'a\\'r'">>}, + {loc,71,1,72,1,1}, + {loc,86,1,87,1,1}, + <<"title='b\\'a\\'r'">>}, {<<"title">>,<<"b\\'a\\'r">>}}, - {attribute, + {token,attribute, {anno,string, - {loc,90,1,91,1,1}, - {loc,96,1,97,1,1}, - <<"hidden">>}, + {loc,90,1,91,1,1}, + {loc,96,1,97,1,1}, + <<"hidden">>}, <<"hidden">>}], - [{text, + [{token,text, {anno,string, - {loc,98,1,99,1,1}, - {loc,99,1,100,1,1}, - <<" ">>}, + {loc,98,1,99,1,1}, + {loc,99,1,100,1,1}, + <<" ">>}, undefined}, - {'div', + {token,'div', {anno,string, - {loc,99,1,100,1,1}, - {loc,121,1,122,1,1}, - <<"
vvv
">>}, + {loc,99,1,100,1,1}, + {loc,121,1,122,1,1}, + <<"
vvv
">>}, {[], - [{b, + [{token,b, {anno,string, {loc,104,1,105,1,1}, {loc,114,1,115,1,1}, <<"vvv">>}, {[], - [{text, - {anno,string, + [{token,text, + {anno,string, {loc,107,1,108,1,1}, {loc,110,1,111,1,1}, <<"vvv">>}, - undefined}]}}, - {text, + undefined}]}}, + {token,text, {anno,string, {loc,114,1,115,1,1}, {loc,115,1,116,1,1}, <<" ">>}, undefined}]}}, - {text, + {token,text, {anno,string, - {loc,121,1,122,1,1}, - {loc,122,1,123,1,1}, - <<" ">>}, + {loc,121,1,122,1,1}, + {loc,122,1,123,1,1}, + <<" ">>}, undefined}]}}, - {text, - {anno,string, - {loc,128,1,129,1,1}, - {loc,139,1,140,1,1}, - <<" Some test ">>}, - undefined}, - {title, + {token,text, + {anno,string, + {loc,128,1,129,1,1}, + {loc,139,1,140,1,1}, + <<" Some test ">>}, + undefined}, + {token,title, + {anno,string, + {loc,139,1,140,1,1}, + {loc,197,1,198,1,1}, + <<"">>}, + {[{token,attribute, {anno,string, - {loc,139,1,140,1,1}, - {loc,197,1,198,1,1}, - <<"">>}, - {[{attribute, - {anno,string, - {loc,146,1,147,1,1}, - {loc,152,1,153,1,1}, - <<"hidden">>}, + {loc,146,1,147,1,1}, + {loc,152,1,153,1,1}, + <<"hidden">>}, <<"hidden">>}, - {attribute, + {token,attribute, {anno,string, - {loc,157,1,158,1,1}, - {loc,165,1,166,1,1}, - <<"id='foo'">>}, + {loc,157,1,158,1,1}, + {loc,165,1,166,1,1}, + <<"id='foo'">>}, {<<"id">>,<<"foo">>}}, - {attribute, + {token,attribute, {anno,string, - {loc,167,1,168,1,1}, - {loc,175,1,176,1,1}, - <<"required">>}, + {loc,167,1,168,1,1}, + {loc,175,1,176,1,1}, + <<"required">>}, <<"required">>}], - <<"AAA">>}}, - {text, - {anno,string, - {loc,197,1,198,1,1}, - {loc,201,1,202,1,1}, - <<" bar">>}, - undefined} + <<"AAA">>}}, + {token,text, + {anno,string, + {loc,197,1,198,1,1}, + {loc,201,1,202,1,1}, + <<" bar">>}, + undefined} ], scan_(?SLINE))} , { "Should scan multiple lines" , ?assertEqual([ - {text, + {token,text, {anno,string,{loc,0,1,1,1,1},{loc,1,2,1,1,1},<<"\n">>}, undefined}, - {script, + {token,script, {anno,string, {loc,1,2,1,1,1}, {loc,40,6,10,1,1}, <<"">>}, {[],<<"\n\n
Foo
\n\n">>}}, - {text, + {token,text, {anno,string, {loc,40,6,10,1,1}, {loc,41,7,1,1,1}, <<"\n">>}, undefined}, - {span, + {token,span, {anno,string, {loc,41,7,1,1,1}, {loc,74,7,34,1,1}, <<" ooooo ">>}, {[], - [{text, + [{token,text, {anno,string, {loc,47,7,7,1,1}, {loc,50,7,10,1,1}, <<" ">>}, undefined}, - {i,{anno,string, + {token,i, + {anno,string, {loc,50,7,10,1,1}, {loc,66,7,26,1,1}, <<" ooooo ">>}, {[], - [{text, - {anno,string, - {loc,53,7,13,1,1}, - {loc,62,7,22,1,1}, - <<" ooooo ">>}, - undefined}]}}, - {text, + [{token,text, + {anno,string, + {loc,53,7,13,1,1}, + {loc,62,7,22,1,1}, + <<" ooooo ">>}, + undefined}]}}, + {token,text, {anno,string, {loc,66,7,26,1,1}, {loc,67,7,27,1,1}, <<" ">>}, undefined}]}}, - {text, + {token,text, {anno,string, {loc,74,7,34,1,1}, {loc,80,10,1,1,1}, <<"\nbar\n\n">>}, undefined}, - {title, + {token,title, {anno,string, {loc,80,10,1,1,1}, {loc,120,14,21,1,1}, <<"\n\n Title\n\n ">>}, {[],<<"\n\n Title\n\n ">>}}, - {text, + {token,text, {anno,string, {loc,120,14,21,1,1}, {loc,121,15,1,1,1}, diff --git a/src/bel_scan_eng_html5_attr.erl b/src/bel_scan_eng_html5_attr.erl index 2095117..7a8a83e 100644 --- a/src/bel_scan_eng_html5_attr.erl +++ b/src/bel_scan_eng_html5_attr.erl @@ -61,16 +61,13 @@ handle_text(_Text, State) -> % case double quote handle_match({?MODULE, attribute, [K, V], Anno}, State) -> - Token = bel_scan:token(attribute, Anno, {K, V}), - {reply, [Token], State}; + {reply, [attribute_token(Anno, {K, V})], State}; % case single quote handle_match({?MODULE, attribute, [<<>>, <<>>, K, V], Anno}, State) -> - Token = bel_scan:token(attribute, Anno, {K, V}), - {reply, [Token], State}; + {reply, [attribute_token(Anno, {K, V})], State}; % case attribute handle_match({?MODULE, attribute, [<<>>, <<>>, <<>>, <<>>, K], Anno}, State) -> - Token = bel_scan:token(attribute, Anno, K), - {reply, [Token], State}; + {reply, [attribute_token(Anno, K)], State}; handle_match({Mod, _, _, _}, State) when Mod =/= ?MODULE -> {noreply, State}. @@ -81,4 +78,9 @@ handle_terminate(_Tokens, State) -> %%% Internal functions %%%===================================================================== -% nothing here yet! +attribute_token(Anno, Metadata) -> + bel_scan_token:new(#{ + id => attribute, + anno => Anno, + metadata => Metadata + }). diff --git a/src/bel_scan_token.erl b/src/bel_scan_token.erl new file mode 100644 index 0000000..ee5ca7e --- /dev/null +++ b/src/bel_scan_token.erl @@ -0,0 +1,82 @@ +%%%--------------------------------------------------------------------- +%%% @copyright 2024 William Fank Thomé +%%% @author William Fank Thomé +%%% @doc Token module. +%%% +%%% Copyright 2024 William Fank Thomé +%%% +%%% Licensed under the Apache License, Version 2.0 (the "License"); +%%% you may not use this file except in compliance with the License. +%%% You may obtain a copy of the License at +%%% +%%% http://www.apache.org/licenses/LICENSE-2.0 +%%% +%%% Unless required by applicable law or agreed to in writing, software +%%% distributed under the License is distributed on an "AS IS" BASIS, +%%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%%% See the License for the specific language governing permissions and +%%% limitations under the License. +%%% +%%% @end +%%%--------------------------------------------------------------------- +-module(bel_scan_token). + +% API functions +-export([ new/1, to_yecc/1 ]). + +% State getters and setters functions +-export([ get_id/1 + , set_id/2 + , get_anno/1 + , set_anno/2 + , get_metadata/1 + , set_metadata/2 + ]). + +-export_type([ t/0, id/0, anno/0, metadata/0 ]). + +-record(token, { id :: id() + , anno :: anno() + , metadata :: metadata() + }). + +-opaque t() :: #token{}. +-type id() :: atom(). +-type anno() :: bel_scan_anno:t(). +-type metadata() :: term(). + +%%%===================================================================== +%%% API functions +%%%===================================================================== + +new(Params) when is_map(Params) -> + #token{ + id = maps:get(id, Params), + anno = maps:get(anno, Params), + metadata = maps:get(metadata, Params, undefined) + }. + +to_yecc(#token{id = Id, anno = Anno, metadata = Metadata}) -> + {Id, bel_scan_anno:to_yecc(Anno), Metadata}. + +%%%===================================================================== +%%% State getters and setters functions +%%%===================================================================== + +get_id(#token{id = Id}) -> + Id. + +set_id(Id, #token{} = Token) -> + Token#token{id = Id}. + +get_anno(#token{anno = Anno}) -> + Anno. + +set_anno(Anno, #token{} = Token) -> + Token#token{anno = Anno}. + +get_metadata(#token{metadata = Metadata}) -> + Metadata. + +set_metadata(Metadata, #token{} = Token) -> + Token#token{metadata = Metadata}. diff --git a/test/bel_scan_SUITE.erl b/test/bel_scan_SUITE.erl index 2a7a962..55744ee 100644 --- a/test/bel_scan_SUITE.erl +++ b/test/bel_scan_SUITE.erl @@ -22,6 +22,7 @@ -module(bel_scan_SUITE). % -include_lib("common_test/include/ct.hrl"). +-include_lib("stdlib/include/assert.hrl"). %% Callback functions -export([ suite/0 @@ -139,19 +140,25 @@ bin(Config) when is_list(Config) -> }, SingleLnBin = <<"foo {{ {{A, b}, {0, \"C\"}} }} bar">>, - [ - {text,{anno,string,{loc,0,1,1,1,1},{loc,4,1,5,1,1},<<"foo ">>}, - undefined}, - {expr,{anno,string, + ?assertEqual([ + {token,text, + {anno,string,{loc,0,1,1,1,1},{loc,4,1,5,1,1},<<"foo ">>}, + undefined}, + {token,expr, + {anno,string, {loc,4,1,5,1,1}, {loc,28,1,29,1,1}, <<"{{ {{A, b}, {0, \"C\"}} }}">>}, - [{tuple,1, - [{tuple,1,[{var,1,'A'},{atom,1,b}]}, + [{tuple,1, + [{tuple,1,[{var,1,'A'},{atom,1,b}]}, {tuple,1,[{integer,1,0},{string,1,"C"}]}]}]}, - {text,{anno,string,{loc,28,1,29,1,1},{loc,32,1,33,1,1},<<" bar">>}, - undefined} - ] = bel_scan:get_tokens(bel_scan:bin(SingleLnBin, Opts)), + {token,text, + {anno,string, + {loc,28,1,29,1,1}, + {loc,32,1,33,1,1}, + <<" bar">>}, + undefined} + ], bel_scan:get_tokens(bel_scan:bin(SingleLnBin, Opts))), MultiLnBin = <<"foo {{ {{A, b}, @@ -162,40 +169,48 @@ bin(Config) when is_list(Config) -> }} ">>, - [ - {text,{anno,string, + ?assertEqual([ + {token,text, + {anno,string, {loc,0,1,1,1,1}, {loc,8,2,5,1,1}, <<"foo\n ">>}, - undefined}, - {expr,{anno,string, + undefined}, + {token,expr, + {anno,string, {loc,8,2,5,1,1}, {loc,40,3,21,1,1}, <<"{{ {{A, b},\n {0, \"C\"}} }}">>}, - [{tuple,1, - [{tuple,1,[{var,1,'A'},{atom,1,b}]}, + [{tuple,1, + [{tuple,1,[{var,1,'A'},{atom,1,b}]}, {tuple,2,[{integer,2,0},{string,2,"C"}]}]}]}, - {text,{anno,string, + {token,text, + {anno,string, {loc,40,3,21,1,1}, {loc,47,6,1,1,1}, <<"\n bar\n\n">>}, - undefined}, - {expr,{anno,string, + undefined}, + {token,expr, + {anno,string, {loc,47,6,1,1,1}, {loc,66,6,20,1,1}, <<"{{ {{ {{ d }} }} }}">>}, - [{tuple,1, - [{tuple,1,[{tuple,1,[{tuple,1,[{atom,1,d}]}]}]}]}]}, - {text,{anno,string,{loc,66,6,20,1,1},{loc,68,6,22,1,1},<<" ">>}, - undefined}, - {expr,{anno,string, + [{tuple,1, + [{tuple,1, + [{tuple,1,[{tuple,1,[{atom,1,d}]}]}]}]}]}, + {token,text, + {anno,string,{loc,66,6,20,1,1},{loc,68,6,22,1,1},<<" ">>}, + undefined}, + {token,expr, + {anno,string, {loc,68,6,22,1,1}, {loc,76,8,3,1,1}, <<"{{ a\n\n}}">>}, - [{atom,1,a}]}, - {text,{anno,string,{loc,76,8,3,1,1},{loc,77,9,1,1,1},<<"\n">>}, - undefined} - ] = bel_scan:get_tokens(bel_scan:bin(MultiLnBin, Opts)), + [{atom,1,a}]}, + {token,text, + {anno,string,{loc,76,8,3,1,1},{loc,77,9,1,1,1},<<"\n">>}, + undefined} + ], bel_scan:get_tokens(bel_scan:bin(MultiLnBin, Opts))), ok. diff --git a/test/support/support_scan_eng.erl b/test/support/support_scan_eng.erl index f62d7a0..8f90fa8 100644 --- a/test/support/support_scan_eng.erl +++ b/test/support/support_scan_eng.erl @@ -74,7 +74,11 @@ handle_terminate(_Tokens, State) -> %%%===================================================================== token(Anno, Expr) -> - bel_scan:token(expr, Anno, scan(Expr)). + bel_scan_token:new(#{ + id => expr, + anno => Anno, + metadata => scan(Expr) + }). scan(Expr) -> {ok, Tokens, _} = erl_scan:string(binary_to_list(<>)), From 6393e8bd5ebd4d5e3c6c264286b112c2164cf55b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?William=20Fank=20Thom=C3=A9?= Date: Tue, 30 Apr 2024 15:54:16 -0300 Subject: [PATCH 25/25] feat: small fixes and implement eel attribute --- src/bel_scan.erl | 3 +- src/bel_scan_anno.erl | 2 +- src/bel_scan_eng_eel.erl | 496 ++++++++++++++++-------------- src/bel_scan_eng_eel_attr.erl | 106 +++++++ src/bel_scan_eng_html5.erl | 394 +++++++++++++----------- src/bel_scan_eng_html5_attr.erl | 5 +- src/bel_scan_mark.erl | 3 +- src/bel_scan_token.erl | 23 +- test/bel_scan_SUITE.erl | 25 +- test/support/support_scan_eng.erl | 3 +- 10 files changed, 628 insertions(+), 432 deletions(-) create mode 100644 src/bel_scan_eng_eel_attr.erl diff --git a/src/bel_scan.erl b/src/bel_scan.erl index e55ca83..134a7b2 100644 --- a/src/bel_scan.erl +++ b/src/bel_scan.erl @@ -136,7 +136,8 @@ text_token(Text, Metadata, #state{} = State) -> end_loc => State#state.loc, text => Text }), - metadata => Metadata + metadata => Metadata, + engine => ?MODULE }). push_token(Token, #state{tokens = Tokens} = State) -> diff --git a/src/bel_scan_anno.erl b/src/bel_scan_anno.erl index 595aa4a..7c216bd 100644 --- a/src/bel_scan_anno.erl +++ b/src/bel_scan_anno.erl @@ -43,7 +43,7 @@ , text :: text() }). --opaque t() :: #anno{}. +-type t() :: #anno{}. -type src() :: {file, file:filename_all()} | {module, module()} | string diff --git a/src/bel_scan_eng_eel.erl b/src/bel_scan_eng_eel.erl index fc97d50..af32761 100644 --- a/src/bel_scan_eng_eel.erl +++ b/src/bel_scan_eng_eel.erl @@ -30,12 +30,27 @@ , handle_terminate/2 ]). +% Support functions +-export([ expr_token/3 + , expr_inline_id/0 + , expr_start_id/0 + , expr_continue_id/0 + , expr_end_id/0 + , comment_id/0 + ]). + -include("bel_scan_eng.hrl"). -ifdef(TEST). -include_lib("eunit/include/eunit.hrl"). -endif. +-define(EXPR_INLINE_ID, eel_expr_inline). +-define(EXPR_START_ID, eel_expr_start). +-define(EXPR_CONTINUE_ID, eel_expr_continue). +-define(EXPR_END_ID, eel_expr_end). +-define(EXPR_COMMENT_ID, eel_comment). + %%%===================================================================== %%% bel_scan_eng callback functions %%%===================================================================== @@ -44,46 +59,99 @@ init(_Opts) -> #engine{ markers = [ #marker{ - id = '<%=', - re = "<%=" + id = expr_inline_id(), + re = "<%=\\s+((?:(?!<%).)*)\\s+\.%>" }, #marker{ - id = '.%>', - re = "\\.\\s*%>" + id = expr_start_id(), + re = "<%=\\s+((?:(?!<%).)*)\\s+%>" }, #marker{ - id = '<%', - re = "<%(?:(?!=))" + id = expr_continue_id(), + re = "<%\\s+((?:(?!<%).)*)\\s+%>" }, #marker{ - id = '%>', - re = "(?:(?!\\.))%>" + id = expr_end_id(), + re = "<%\\s+((?:(?!<%).)*)\\s+\.%>" }, #marker{ - id = '<%!--', - re = "<%!--" - }, - #marker{ - id = '--%>', - re = "--%>" + id = comment_id(), + re = "<%!--\s+((?:(?!<%).)*)\s+--%>" } + % TODO: Use "simple" markers and use a parser (yecc) to + % spot issues, like missing ending marker, e.g.: + % > "<%= case Bool of true -> %>ok<% end %>" <- Missing ".%>" + % #marker{ + % id = '<%=', + % re = "<%=" + % }, + % #marker{ + % id = '.%>', + % re = "\\.\\s*%>" + % }, + % #marker{ + % id = '<%', + % re = "<%(?:(?!=))" + % }, + % #marker{ + % id = '%>', + % re = "(?:(?!\\.))%>" + % }, + % #marker{ + % id = '<%!--', + % re = "<%!--" + % }, + % #marker{ + % id = '--%>', + % re = "--%>" + % } ] }. -handle_start(_Bin, State) -> - {noreply, State}. +handle_start(_Bin, Scan) -> + {noreply, Scan}. -handle_text(_Text, State) -> - {noreply, State}. +handle_text(_Text, Scan) -> + {noreply, Scan}. -handle_match({?MODULE, MarkerId, [], Anno}, State) -> - Token = bel_scan_token:new(#{id => MarkerId, anno => Anno}), - {reply, [Token], State}; -handle_match({Mod, _, _, _}, State) when Mod =/= ?MODULE -> - {noreply, State}. +handle_match({?MODULE, eel_comment, _Captured, _Anno}, Scan) -> + % TODO: Maybe push a comment token. + {noreply, Scan}; +handle_match({?MODULE, MarkerId, [Expr], Anno}, Scan) -> + Token = expr_token(MarkerId, Anno, Expr), + {reply, [Token], Scan}; +handle_match({Mod, _, _, _}, Scan) when Mod =/= ?MODULE -> + {noreply, Scan}. -handle_terminate(_Tokens, State) -> - {noreply, State}. +handle_terminate(_Tokens, Scan) -> + {noreply, Scan}. + +%%%===================================================================== +%%% Support functions +%%%===================================================================== + +expr_token(Id, Anno, Expr) -> + bel_scan_token:new(#{ + id => Id, + anno => Anno, + engine => ?MODULE, + metadata => Expr + }). + +expr_inline_id() -> + ?EXPR_INLINE_ID. + +expr_start_id() -> + ?EXPR_START_ID. + +expr_continue_id() -> + ?EXPR_CONTINUE_ID. + +expr_end_id() -> + ?EXPR_END_ID. + +comment_id() -> + ?EXPR_COMMENT_ID. %%%===================================================================== %%% Internal functions @@ -124,218 +192,194 @@ i ">>). scan_(Bin) -> - bel_scan:get_tokens(bel_scan:bin(Bin, #{engines => [?MODULE]})). + bel_scan:get_tokens(bel_scan:bin(Bin, #{ + engines => [ + ?MODULE, + {bel_scan_eng_html5, #{ + attr_engines => [ + bel_scan_eng_eel_attr, + bel_scan_eng_html5_attr + ] + }} + ] + })). scan_test() -> - [ { "Should scan single line" + [ { "Should scan attributes" , ?assertEqual([ - {token,text, + {token,html_tag, {anno,string, {loc,0,1,1,1,1}, - {loc,2,1,3,1,1}, - <<"a ">>}, - undefined}, - {token,'<%=', - {anno,string, - {loc,2,1,3,1,1}, - {loc,5,1,6,1,1}, - <<"<%=">>}, - undefined}, - {token,text, - {anno,string, - {loc,5,1,6,1,1}, - {loc,8,1,9,1,1}, - <<" b ">>}, - undefined}, - {token,'.%>', - {anno,string, - {loc,8,1,9,1,1}, - {loc,11,1,12,1,1}, - <<".%>">>}, - undefined}, - {token,text, - {anno,string, - {loc,11,1,12,1,1}, - {loc,14,1,15,1,1}, - <<" c ">>}, - undefined}, - {token,'<%=', - {anno,string, - {loc,14,1,15,1,1}, - {loc,17,1,18,1,1}, - <<"<%=">>}, - undefined}, - {token,text, - {anno,string, - {loc,17,1,18,1,1}, - {loc,20,1,21,1,1}, - <<" d ">>}, - undefined}, - {token,'%>', - {anno,string, - {loc,20,1,21,1,1}, - {loc,22,1,23,1,1}, - <<"%>">>}, - undefined}, - {token,text, - {anno,string, - {loc,22,1,23,1,1}, - {loc,25,1,26,1,1}, - <<" e ">>}, - undefined}, - {token,'<%', - {anno,string, - {loc,25,1,26,1,1}, - {loc,27,1,28,1,1}, - <<"<%">>}, - undefined}, - {token,text, - {anno,string, - {loc,27,1,28,1,1}, - {loc,30,1,31,1,1}, - <<" f ">>}, - undefined}, - {token,'%>', - {anno,string, - {loc,30,1,31,1,1}, - {loc,32,1,33,1,1}, - <<"%>">>}, - undefined}, - {token,text, - {anno,string, - {loc,32,1,33,1,1}, {loc,35,1,36,1,1}, - <<" g ">>}, - undefined}, - {token,'<%', + <<"
foo
">>}, + {<<"div">>, + {[{token,eel_directive, + {anno,string, + {loc,5,1,6,1,1}, + {loc,16,1,17,1,1}, + <<":if={@bool}">>}, + {<<"if">>, + [{token,eel_expr_inline, + {anno,string, + {loc,10,1,11,1,1}, + {loc,15,1,16,1,1}, + <<":if={@bool}">>}, + <<"@bool">>,bel_scan_eng_eel}]}, + bel_scan_eng_eel_attr}, + {token,eel_attribute, + {anno,string, + {loc,17,1,18,1,1}, + {loc,25,1,26,1,1}, + <<"id={@id}">>}, + {<<"id">>, + [{token,eel_expr_inline, + {anno,string, + {loc,21,1,22,1,1}, + {loc,24,1,25,1,1}, + <<"id={@id}">>}, + <<"@id">>,bel_scan_eng_eel}]}, + bel_scan_eng_eel_attr}], + [{token,text, + {anno,string, + {loc,26,1,27,1,1}, + {loc,29,1,30,1,1}, + <<"foo">>}, + undefined,bel_scan}]}}, + bel_scan_eng_html5}, + {token,html_tag, {anno,string, {loc,35,1,36,1,1}, - {loc,37,1,38,1,1}, - <<"<%">>}, - undefined}, - {token,text, - {anno,string, - {loc,37,1,38,1,1}, - {loc,40,1,41,1,1}, - <<" h ">>}, - undefined}, - {token,'.%>', - {anno,string, - {loc,40,1,41,1,1}, - {loc,43,1,44,1,1}, - <<".%>">>}, - undefined}, - {token,text, - {anno,string, - {loc,43,1,44,1,1}, - {loc,45,1,46,1,1}, - <<" i">>}, - undefined} + {loc,63,1,64,1,1}, + <<"bar">>}, + {<<"span">>, + {[{token,html5_attribute, + {anno,string, + {loc,41,1,42,1,1}, + {loc,52,1,53,1,1}, + <<"class='foo'">>}, + {<<"class">>,<<"foo">>}, + bel_scan_eng_html5_attr}], + [{token,text, + {anno,string, + {loc,53,1,54,1,1}, + {loc,56,1,57,1,1}, + <<"bar">>}, + undefined,bel_scan}]}}, + bel_scan_eng_html5} + ], scan_(<<"
foo
bar">>))} + , { "Should scan single line" + , ?assertEqual([ + {token,text, + {anno,string, + {loc,0,1,1,1,1}, + {loc,2,1,3,1,1}, + <<"a ">>}, + undefined,bel_scan}, + {token,eel_expr_inline, + {anno,string, + {loc,2,1,3,1,1}, + {loc,11,1,12,1,1}, + <<"<%= b .%>">>}, + <<"b">>,bel_scan_eng_eel}, + {token,text, + {anno,string, + {loc,11,1,12,1,1}, + {loc,14,1,15,1,1}, + <<" c ">>}, + undefined,bel_scan}, + {token,eel_expr_start, + {anno,string, + {loc,14,1,15,1,1}, + {loc,22,1,23,1,1}, + <<"<%= d %>">>}, + <<"d">>,bel_scan_eng_eel}, + {token,text, + {anno,string, + {loc,22,1,23,1,1}, + {loc,25,1,26,1,1}, + <<" e ">>}, + undefined,bel_scan}, + {token,eel_expr_continue, + {anno,string, + {loc,25,1,26,1,1}, + {loc,32,1,33,1,1}, + <<"<% f %>">>}, + <<"f">>,bel_scan_eng_eel}, + {token,text, + {anno,string, + {loc,32,1,33,1,1}, + {loc,35,1,36,1,1}, + <<" g ">>}, + undefined,bel_scan}, + {token,eel_expr_end, + {anno,string, + {loc,35,1,36,1,1}, + {loc,43,1,44,1,1}, + <<"<% h .%>">>}, + <<"h">>,bel_scan_eng_eel}, + {token,text, + {anno,string, + {loc,43,1,44,1,1}, + {loc,45,1,46,1,1}, + <<" i">>}, + undefined,bel_scan} ], scan_(?SLINE))} , { "Should scan multiple lines" , ?assertEqual([ - {token,text, - {anno,string, - {loc,0,1,1,1,1}, - {loc,2,1,3,1,1}, - <<"a ">>}, - undefined}, - {token,'<%=', - {anno,string, - {loc,2,1,3,1,1}, - {loc,5,1,6,1,1}, - <<"<%=">>}, - undefined}, - {token,text, - {anno,string, - {loc,5,1,6,1,1}, - {loc,8,2,1,1,1}, - <<" b\n">>}, - undefined}, - {token,'.%>', - {anno,string, - {loc,8,2,1,1,1}, - {loc,11,2,4,1,1}, - <<".%>">>}, - undefined}, - {token,text, - {anno,string, - {loc,11,2,4,1,1}, - {loc,14,2,7,1,1}, - <<" c ">>}, - undefined}, - {token,'<%=', - {anno,string, - {loc,14,2,7,1,1}, - {loc,17,2,10,1,1}, - <<"<%=">>}, - undefined}, - {token,text, - {anno,string, - {loc,17,2,10,1,1}, - {loc,20,3,3,1,1}, - <<"\nd ">>}, - undefined}, - {token,'%>', - {anno,string, - {loc,20,3,3,1,1}, - {loc,22,3,5,1,1}, - <<"%>">>}, - undefined}, - {token,text, - {anno,string, - {loc,22,3,5,1,1}, - {loc,25,4,1,1,1}, - <<" e\n">>}, - undefined}, - {token,'<%', - {anno,string, - {loc,25,4,1,1,1}, - {loc,27,4,3,1,1}, - <<"<%">>}, - undefined}, - {token,text, - {anno,string, - {loc,27,4,3,1,1}, - {loc,31,6,1,1,1}, - <<" f\n\n">>}, - undefined}, - {token,'%>', - {anno,string, - {loc,31,6,1,1,1}, - {loc,33,6,3,1,1}, - <<"%>">>}, - undefined}, - {token,text, - {anno,string, - {loc,33,6,3,1,1}, - {loc,41,10,1,1,1}, - <<"\n\n g\n\n">>}, - undefined}, - {token,'<%', - {anno,string, - {loc,41,10,1,1,1}, - {loc,43,10,3,1,1}, - <<"<%">>}, - undefined}, - {token,text, - {anno,string, - {loc,43,10,3,1,1}, - {loc,57,14,10,1,1}, - <<"\n\nh\n\n ">>}, - undefined}, - {token,'.%>', - {anno,string, - {loc,57,14,10,1,1}, - {loc,60,14,13,1,1}, - <<".%>">>}, - undefined}, - {token,text, - {anno,string, - {loc,60,14,13,1,1}, - {loc,65,18,1,1,1}, - <<"\n\ni\n\n">>}, - undefined} + {token,text, + {anno,string, + {loc,0,1,1,1,1}, + {loc,2,1,3,1,1}, + <<"a ">>}, + undefined,bel_scan}, + {token,eel_expr_inline, + {anno,string, + {loc,2,1,3,1,1}, + {loc,11,2,4,1,1}, + <<"<%= b\n.%>">>}, + <<"b">>,bel_scan_eng_eel}, + {token,text, + {anno,string, + {loc,11,2,4,1,1}, + {loc,14,2,7,1,1}, + <<" c ">>}, + undefined,bel_scan}, + {token,eel_expr_start, + {anno,string, + {loc,14,2,7,1,1}, + {loc,22,3,5,1,1}, + <<"<%=\nd %>">>}, + <<"d">>,bel_scan_eng_eel}, + {token,text, + {anno,string, + {loc,22,3,5,1,1}, + {loc,25,4,1,1,1}, + <<" e\n">>}, + undefined,bel_scan}, + {token,eel_expr_continue, + {anno,string, + {loc,25,4,1,1,1}, + {loc,33,6,3,1,1}, + <<"<% f\n\n%>">>}, + <<"f">>,bel_scan_eng_eel}, + {token,text, + {anno,string, + {loc,33,6,3,1,1}, + {loc,41,10,1,1,1}, + <<"\n\n g\n\n">>}, + undefined,bel_scan}, + {token,eel_expr_end, + {anno,string, + {loc,41,10,1,1,1}, + {loc,60,14,13,1,1}, + <<"<%\n\nh\n\n .%>">>}, + <<"h">>,bel_scan_eng_eel}, + {token,text, + {anno,string, + {loc,60,14,13,1,1}, + {loc,65,18,1,1,1}, + <<"\n\ni\n\n">>}, + undefined,bel_scan} ], scan_(?MLINE))} ]. diff --git a/src/bel_scan_eng_eel_attr.erl b/src/bel_scan_eng_eel_attr.erl new file mode 100644 index 0000000..56f249e --- /dev/null +++ b/src/bel_scan_eng_eel_attr.erl @@ -0,0 +1,106 @@ +%%%--------------------------------------------------------------------- +%%% @copyright 2024 William Fank Thomé +%%% @author William Fank Thomé +%%% @doc EEl attributes engine module. +%%% +%%% Copyright 2024 William Fank Thomé +%%% +%%% Licensed under the Apache License, Version 2.0 (the "License"); +%%% you may not use this file except in compliance with the License. +%%% You may obtain a copy of the License at +%%% +%%% http://www.apache.org/licenses/LICENSE-2.0 +%%% +%%% Unless required by applicable law or agreed to in writing, software +%%% distributed under the License is distributed on an "AS IS" BASIS, +%%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%%% See the License for the specific language governing permissions and +%%% limitations under the License. +%%% +%%% @end +%%%--------------------------------------------------------------------- +-module(bel_scan_eng_eel_attr). +-behaviour(bel_scan_eng). + +% bel_scan_eng callback functions +-export([ init/1 + , handle_start/2 + , handle_text/2 + , handle_match/2 + , handle_terminate/2 + ]). + +-include("bel_scan_eng.hrl"). + +-define(DIRECTIVES, [<<"if">>, <<"let">>, <<"for">>]). + +%%%===================================================================== +%%% bel_scan_eng callback functions +%%%===================================================================== + +init(_Opts) -> + #engine{ + markers = [ + #marker{ + id = eel_attribute, + re = "(?:(?!:))(\\w+)=\\{(.*?[^\\\\}])\\}" + }, + #marker{ + id = eel_directive, + re = [":(", lists:join("|", ?DIRECTIVES), ")=\\{(.*?[^\\\\}])\\}"] + } + ] + }. + +handle_start(_Bin, Scan) -> + {noreply, Scan}. + +handle_text(_Text, Scan) -> + {halt, Scan}. + +handle_match({?MODULE, eel_attribute, [K, Expr], Anno}, Scan) -> + {reply, [attribute_token(Anno, {K, Expr})], Scan}; +handle_match({?MODULE, eel_directive, [K, Expr], Anno}, Scan) -> + case lists:member(K, ?DIRECTIVES) of + true -> + {reply, [directive_token(Anno, {K, Expr})], Scan}; + false -> + error({invalid_directive, K}) + end; +handle_match({Mod, _, _, _}, Scan) when Mod =/= ?MODULE -> + {noreply, Scan}. + +handle_terminate(_Tokens, Scan) -> + {noreply, Scan}. + +%%%===================================================================== +%%% Internal functions +%%%===================================================================== + +attribute_token(Anno, {K, Expr}) -> + InitLoc = bel_scan_anno:get_loc(Anno), + Loc = bel_scan_loc:read(<>, InitLoc), + token(eel_attribute, Loc, Anno, {K, Expr}). + +directive_token(Anno, {K, Expr}) -> + InitLoc = bel_scan_anno:get_loc(Anno), + Loc = bel_scan_loc:read(<<$:, K/binary, "={">>, InitLoc), + token(eel_directive, Loc, Anno, {K, Expr}). + +token(Id, Loc, Anno, {K, Expr}) -> + bel_scan_token:new(#{ + id => Id, + anno => Anno, + metadata => {K, expr_tokens(Loc, Anno, Expr)}, + engine => ?MODULE + }). + +expr_tokens(Loc, Anno0, Expr) -> + Anno1 = bel_scan_anno:set_loc(Loc, Anno0), + EndLoc = bel_scan_loc:read(Expr, Loc), + Anno = bel_scan_anno:set_end_loc(EndLoc, Anno1), + [bel_scan_eng_eel:expr_token( + bel_scan_eng_eel:expr_inline_id(), + Anno, + Expr + )]. diff --git a/src/bel_scan_eng_html5.erl b/src/bel_scan_eng_html5.erl index e9a0547..d5f3471 100644 --- a/src/bel_scan_eng_html5.erl +++ b/src/bel_scan_eng_html5.erl @@ -61,7 +61,7 @@ -define(ATTRS_ENGINES, [bel_scan_eng_html5_attr]). --record(state, { attrs_engines :: [module()] }). +-record(state, { attr_engines :: [module()] }). %%%===================================================================== %%% bel_scan_eng callback functions @@ -69,6 +69,9 @@ init(Opts) -> #engine{ + % TODO: Use "simple" markers and use a parser (yecc) to + % spot issues, like missing ending marker, e.g.: + % > "
" <- Missing "
" markers = [ #marker{ id = doctype, @@ -95,11 +98,15 @@ init(Opts) -> ?CHILD_NODES ?CLOSING_TAG >> + }, + #marker{ + id = comment, + re = "