diff --git a/include/bel_scan_eng.hrl b/include/bel_scan_eng.hrl new file mode 100644 index 0000000..857d19b --- /dev/null +++ b/include/bel_scan_eng.hrl @@ -0,0 +1,13 @@ +%% This header exists to be possible to do pattern +%% matching in modules that behaves as bel_scan_eng. + +-record(marker, { + id :: bel_scan_mark:id(), + re :: bel_scan_mark:re() +}). + +-record(engine, { + module :: module(), + markers :: [bel_scan_mark:t()], + state :: bel_scan_eng:state() +}). diff --git a/rebar.config b/rebar.config index 166afdf..b4ccd1f 100644 --- a/rebar.config +++ b/rebar.config @@ -1,4 +1,4 @@ -{erl_opts, [debug_info, warnings_as_errors]}. +{erl_opts, [debug_info]}. {deps, []}. @@ -18,6 +18,9 @@ ]}. {profiles, [ + {prod, [ + {erl_opts, [no_debug_info, warnings_as_errors]} + ]}, {test, [ {erl_opts, [{extra_src_dirs, ["test/support"]}]} ]} diff --git a/src/bel_scan.erl b/src/bel_scan.erl index 2b92fcb..134a7b2 100644 --- a/src/bel_scan.erl +++ b/src/bel_scan.erl @@ -22,508 +22,333 @@ -module(bel_scan). -compile(inline_list_funcs). -% API +% API functions -export([ new/1 - , string/2 - , continue/2 - , skip_new_lns/2 - , terminate/1 - , new_ln/1 - , incr_col/1 - , incr_col/2 - , snapshot/1 - , update_pos/1 - , pos_text/1 - , anno/1 - , anno/2 - , anno/3 - , token/2 - , token/3 + , bin/2 + , state/1 + , state/2 + , fold/2 + , init_engines/1 + , lookup_engine/2 + , text_token/2 + , text_token/3 , push_token/2 , push_tokens/2 - , fold/2 + , yecc_tokens/1 ]). -% State get/set --export([ get/2 - , set/3 - , get_input/1 - , set_input/2 - , get_handler/1 - , set_handler/2 - , get_handler_state/1 - , set_handler_state/2 - , get_tokens/1 - , set_tokens/2 - , get_ln/1 - , set_ln/2 - , get_col/1 - , set_col/2 +% State getters and setters functions +-export([ get_src/1 + , set_src/2 + , get_engines/1 + , set_engines/2 + , get_bpart/1 + , set_bpart/2 , get_loc/1 , set_loc/2 - , get_snap_loc/1 - , set_snap_loc/2 - , get_buffer_pos/1 - , set_buffer_pos/2 - , get_pos/1 - , set_pos/2 - , get_len/1 - , set_len/2 - , get_source/1 - , set_source/2 + , get_prev_loc/1 + , set_prev_loc/2 + , get_tokens/1 + , set_tokens/2 ]). -export_type([ t/0 - , input/0 - , rest/0 - , handler/0 - , handler_opts/0 - , handler_state/0 - , tag/0 - , metadata/0 - , anno/0 - , value/0 + , src/0 + , engine/0 + , bpart/0 + , loc/0 + , pos/0 , token/0 - , line/0 - , column/0 - , location/0 - , position/0 - , length/0 - , result/0 ]). -% Callbacks - --callback init(handler_opts()) -> {ok, handler_state()}. - --callback handle_char(char(), rest(), t()) -> t(). - --callback handle_tokens([token()], t()) -> result(). - -% Libs - --ifdef(TEST). --include_lib("eunit/include/eunit.hrl"). --endif. - -% Macros - --define(is_ln(X), ( - is_integer(X) andalso X >= 1 -)). - --define(is_col(X), ( - is_integer(X) andalso X >= 1 -)). - --define(is_loc(X), ( - is_tuple(X) - andalso tuple_size(X) =:= 2 - andalso ?is_ln(element(1, X)) - andalso ?is_col(element(2, X)) -)). - --define(is_position(X), ( - is_integer(X) andalso X >= 0 -)). - --define(is_length(X), ( - is_integer(X) andalso X >= 0 -)). - --define(is_filename(X), ( - is_list(X) orelse is_binary(X) -)). - --define(is_source(X), ( - X =:= undefined - orelse ( - is_tuple(X) - andalso tuple_size(X) =:= 2 - andalso ( - ( - element(1, X) =:= file - andalso ?is_filename(element(2, X)) - ) - orelse ( - element(1, X) =:= module - andalso is_atom(element(2, X)) - ) - ) - ) -)). - --define(valid_params(Input, Handler), ( - is_binary(Input) andalso is_atom(Handler) -)). - --define(is_anno(X), ( - is_tuple(X) - andalso tuple_size(X) =:= 3 - andalso ?is_loc(element(1, X)) - andalso ( - element(2, X) =:= undefined - orelse ?is_filename(element(2, X)) - ) -)). - --define(is_token(X), ( - is_tuple(X) - andalso tuple_size(X) =:= 3 - andalso ?is_anno(element(2, X)) -)). - --define(DEFAULTS, #{ - handler_state => undefined, - tokens => [], - ln => 1, - col => 1, - snap_loc => {1, 1}, - buffer_pos => 0, - pos => 0, - len => 0, - source => undefined -}). - -% Types - --record(state, { input :: input() - , handler :: handler() - , handler_state :: handler_state() - , tokens :: [token()] - , ln :: line() - , col :: column() - , snap_loc :: location() - , buffer_pos :: position() - , pos :: position() - , len :: length() - , source :: source() - }). +-import(bel_scan_loc, [ new_ln/1, incr_col/2 ]). +-import(bel_scan_bpart, [ incr_len/2, get_part/1 ]). --opaque t() :: #state{}. --type input() :: binary(). --type rest() :: bitstring(). --type handler() :: module(). --type handler_opts() :: term(). --type handler_state() :: term(). --type line() :: pos_integer(). --type column() :: pos_integer(). --type location() :: {line(), column()}. --type position() :: non_neg_integer(). --type length() :: non_neg_integer(). --type tag() :: term(). --type metadata() :: term(). --type filename() :: file:filename_all() | undefined. --type anno() :: {location(), source(), metadata()}. --type value() :: term(). --type token() :: {tag(), anno(), value()}. --type source() :: {file, filename()} - | {module, module()} - | undefined - . --type result() :: term(). +-include("bel_scan_eng.hrl"). -%%%===================================================================== -%%% API -%%%===================================================================== - -% Fixes no return warning because of the false positive of the #state{}. --dialyzer({nowarn_function, [new/1]}). +-define(DEFAULT_OPTS, #{}). +-define(DEFAULT_META, undefined). -new(#{input := I, handler := H} = Params) when ?valid_params(I, H) -> - maps:fold(fun set/3, #state{}, maps:merge(?DEFAULTS, Params)). - -string(Opts, #state{} = State) -> - Handler = State#state.handler, - {ok, HandlerState} = Handler:init(Opts), - continue(State#state.input, State#state{handler_state = HandlerState}). +-record(state, { src :: src() + , engines :: [{module(), engine()}] + , bpart :: bpart() + , loc :: loc() + , prev_loc :: loc() + , tokens :: [token()] + , init_pos :: pos() + }). -continue(<>, State0) -> - case skip_new_lns(Rest0, State0) of - {ok, {Char, Rest, #state{handler = Handler} = State}} -> - Handler:handle_char(Char, Rest, State); - {eof, State} -> - terminate(State) - end; -continue(<<>>, #state{} = State) -> - terminate(State). - -skip_new_lns(<<$\r, $\n, Rest/bitstring>>, State) -> - skip_new_lns(Rest, new_ln(incr_col(State))); -skip_new_lns(<<$\r, Rest/bitstring>>, State) -> - skip_new_lns(Rest, new_ln(incr_col(State))); -skip_new_lns(<<$\n, Rest/bitstring>>, State) -> - skip_new_lns(Rest, new_ln(incr_col(State))); -skip_new_lns(<<$\f, Rest/bitstring>>, State) -> - skip_new_lns(Rest, new_ln(incr_col(State))); -skip_new_lns(<>, State) -> - {ok, {Char, Rest, State}}; -skip_new_lns(<<>>, State) -> - {eof, State}. - -terminate(#state{handler = Handler} = State) -> - Handler:handle_tokens(State#state.tokens, State). - -new_ln(#state{} = State) -> - State#state{ - ln = State#state.ln+1, - col = 1 - }. +-opaque t() :: #state{}. +-type src() :: bel_scan_anno:src(). +-type engine() :: bel_scan_eng:t(). +-type bpart() :: bel_scan_bpart:t(). +-type loc() :: bel_scan_loc:t(). +-type pos() :: bel_scan_loc:pos(). +-type token() :: bel_scan_token:t(). -incr_col(#state{} = State) -> - incr_col(1, State). +%%%===================================================================== +%%% API functions +%%%===================================================================== -incr_col(N, #state{} = State) when ?is_col(N) -> - State#state{ - col = State#state.col + N, - buffer_pos = State#state.buffer_pos + N, - len = State#state.len + N +new(Params) when is_map(Params) -> + Loc = maps:get(loc, Params, bel_scan_loc:new(#{})), + #state{ + src = maps:get(src, Params, string), + engines = init_engines(maps:get(engines, Params)), + bpart = maps:get(bpart, Params, bel_scan_bpart:new(#{ + bin => maps:get(bin, Params, <<>>) + })), + loc = Loc, + prev_loc = maps:get(prev_loc, Params, Loc), + tokens = maps:get(tokens, Params, []), + init_pos = maps:get(init_pos, Params, bel_scan_loc:get_pos(Loc)) }. -snapshot(#state{} = State) -> - State#state{ - snap_loc = {State#state.ln, State#state.col} - }. +bin(Bin, Opts) when is_binary(Bin) -> + start(Bin, new(Opts)). -update_pos(#state{} = State) -> - State#state{ - pos = State#state.buffer_pos, - len = 0 - }. +state(#state{bpart = BPart} = State) -> + start(bel_scan_bpart:get_bin(BPart), State). -pos_text(#state{} = State) -> - binary_part(State#state.input, State#state.pos, State#state.len). +state(Bin, #state{bpart = BPart} = State) when is_binary(Bin) -> + start(Bin, State#state{ + bpart = bel_scan_bpart:set_bin(Bin, BPart) + }). -anno(State) -> - anno(undefined, State). +fold(#state{} = State, Funs) -> + lists:foldl(fun(F, S) -> F(S) end, State, Funs). -anno(Metadata, #state{} = State) -> - anno(get_snap_loc(State), get_source(State), Metadata). +init_engines(Modules) -> + [init_engine(Mod) || Mod <- Modules]. -anno(Location, Source, Metadata) when ?is_loc(Location), ?is_source(Source) -> - {Location, Source, Metadata}. +lookup_engine(Mod, #state{engines = Engines}) -> + proplists:lookup(Mod, Engines). -token(Tag, #state{} = State) -> - {Tag, anno(State), pos_text(State)}. +text_token(Text, State) -> + text_token(Text, ?DEFAULT_META, State). -token(Tag, Metadata, #state{} = State) -> - {Tag, anno(State), Metadata}; -token(Tag, Anno, Metadata) when ?is_anno(Anno) -> - {Tag, Anno, Metadata}. +text_token(Text, Metadata, #state{} = State) -> + bel_scan_token:new(#{ + id => text, + anno => bel_scan_anno:new(#{ + src => State#state.src, + loc => State#state.prev_loc, + end_loc => State#state.loc, + text => Text + }), + metadata => Metadata, + engine => ?MODULE + }). -push_token(Token, #state{} = State) when ?is_token(Token) -> - State#state{tokens = [Token | State#state.tokens]}. +push_token(Token, #state{tokens = Tokens} = State) -> + State#state{tokens = Tokens ++ [Token]}. -push_tokens(Tokens, #state{} = State) when is_list(Tokens) -> +push_tokens(Tokens, State) when is_list(Tokens) -> lists:foldl(fun push_token/2, State, Tokens). -fold(#state{} = State, Funs) when is_list(Funs) -> - lists:foldl(fun(Fun, Acc) when is_function(Fun, 1) -> - Fun(Acc) - end, State, Funs). +yecc_tokens(#state{tokens = Tokens}) -> + [bel_scan_token:to_yecc(Token) || Token <- Tokens]. %%%===================================================================== -%%% State get/set +%%% State getters and setters functions %%%===================================================================== -get(input, State) -> - get_input(State); -get(handler, State) -> - get_handler(State); -get(handler_state, State) -> - get_handler_state(State); -get(tokens, State) -> - get_tokens(State); -get(ln, State) -> - get_ln(State); -get(col, State) -> - get_col(State); -get(loc, State) -> - get_loc(State); -get(snap_loc, State) -> - get_snap_loc(State); -get(buffer_pos, State) -> - get_buffer_pos(State); -get(pos, State) -> - get_pos(State); -get(len, State) -> - get_len(State); -get(source, State) -> - get_source(State). - -set(input, Value, State) -> - set_input(Value, State); -set(handler, Value, State) -> - set_handler(Value, State); -set(handler_state, Value, State) -> - set_handler_state(Value, State); -set(tokens, Value, State) -> - set_tokens(Value, State); -set(ln, Value, State) -> - set_ln(Value, State); -set(col, Value, State) -> - set_col(Value, State); -set(loc, Value, State) -> - set_loc(Value, State); -set(snap_loc, Value, State) -> - set_snap_loc(Value, State); -set(buffer_pos, Value, State) -> - set_buffer_pos(Value, State); -set(pos, Value, State) -> - set_pos(Value, State); -set(len, Value, State) -> - set_len(Value, State); -set(source, Value, State) -> - set_source(Value, State). - -get_input(#state{input = Input}) -> - Input. - -set_input(Input, #state{} = State) when is_binary(Input) -> - State#state{input = Input}. - -get_handler(#state{handler = Handler}) -> - Handler. - -set_handler(Handler, #state{} = State) when is_atom(Handler) -> - State#state{handler = Handler}. - -get_handler_state(#state{handler_state = HandlerState}) -> - HandlerState. - -set_handler_state(HandlerState, #state{} = State) -> - State#state{handler_state = HandlerState}. +get_src(#state{src = Src}) -> + Src. -get_tokens(#state{tokens = Tokens}) -> - Tokens. - -set_tokens(Tokens, #state{} = State) when is_list(Tokens) -> - State#state{tokens = Tokens}. +set_src(Src, #state{} = State) -> + State#state{src = Src}. -get_ln(#state{ln = Ln}) -> - Ln. +get_engines(#state{engines = Engines}) -> + Engines. -set_ln(Ln, #state{} = State) when ?is_ln(Ln) -> - State#state{ln = Ln}. +set_engines(Engines, #state{} = State) -> + State#state{engines = Engines}. -get_col(#state{col = Col}) -> - Col. +get_bpart(#state{bpart = BPart}) -> + BPart. -set_col(Col, #state{} = State) when ?is_col(Col) -> - State#state{col = Col}. - -get_loc(#state{ln = Ln, col = Col}) -> - {Ln, Col}. - -set_loc({Ln, Col}, State) when ?is_ln(Ln), ?is_col(Col) -> - State#state{ - ln = Ln, - col = Col - }. +set_bpart(BPart, #state{} = State) -> + State#state{bpart = BPart}. -get_snap_loc(#state{snap_loc = Col}) -> - Col. +get_loc(#state{loc = Loc}) -> + Loc. -set_snap_loc({Ln, Col}, #state{} = State) when ?is_ln(Ln), ?is_col(Col) -> - State#state{snap_loc = {Ln, Col}}. +set_loc(Loc, #state{} = State) -> + State#state{loc = Loc}. -get_buffer_pos(#state{buffer_pos = BufferPos}) -> - BufferPos. +get_prev_loc(#state{prev_loc = PrevLoc}) -> + PrevLoc. -set_buffer_pos(BufferPos, #state{} = State) when ?is_position(BufferPos) -> - State#state{buffer_pos = BufferPos}. +set_prev_loc(PrevLoc, #state{} = State) -> + State#state{prev_loc = PrevLoc}. -get_pos(#state{pos = Pos}) -> - Pos. - -set_pos(Pos, #state{} = State) when ?is_position(Pos) -> - State#state{pos = Pos}. - -get_len(#state{len = Len}) -> - Len. - -set_len(Len, #state{} = State) when ?is_length(Len) -> - State#state{len = Len}. - -get_source(#state{source = Source}) -> - Source. +get_tokens(#state{tokens = Tokens}) -> + Tokens. -set_source(Source, #state{} = State) when ?is_source(Source) -> - State#state{source = Source}. +set_tokens(Tokens, #state{} = State) -> + State#state{tokens = Tokens}. %%%===================================================================== %%% Internal functions %%%===================================================================== -% nothing here yet! - -%%%===================================================================== -%%% Tests -%%% TODO: All kind of missing tests. -%%% TODO: Move tests to "../test/bel_scan_SUITE.erl". -%%%===================================================================== - --ifdef(TEST). --compile([export_all, nowarn_export_all]). - -% Callbacks - -init([]) -> - {ok, []}. +init_engine(Mod) when is_atom(Mod) -> + init_engine({Mod, ?DEFAULT_OPTS}); +init_engine({Mod, Opts}) when is_atom(Mod), is_map(Opts) -> + {Mod, bel_scan_eng:compile(Mod:init(Opts))}; +init_engine({Mod, #engine{} = Eng}) when is_atom(Mod) -> + {Mod, Eng}. + +start(Bin0, State0) -> + State = handle_start(Bin0, State0), + Bin = bel_scan_bpart:get_bin(State#state.bpart), + continue(find_start_markers, Bin, State). + +continue(scan, <<>>, State) -> + terminate(State); +continue(find_start_markers, <<>>, State) -> + terminate(State); +continue(scan, <>, State) -> + case bel_scan_read:bin(Rest0) of + {{new_ln, Incr}, Rest} -> + continue(find_start_markers, Rest, fold(State, [ + fun(S) -> S#state{loc = new_ln(S#state.loc)} end, + fun(S) -> S#state{bpart = incr_len(Incr, S#state.bpart)} end + ])); + {{continue, Incr}, Rest} -> + continue(find_start_markers, Rest, fold(State, [ + fun(S) -> S#state{loc = incr_col(Incr, S#state.loc)} end, + fun(S) -> S#state{bpart = incr_len(Incr, S#state.bpart)} end + ])); + terminate -> + terminate(State) + end; +continue(find_start_markers, <>, State0) -> + case find_marker(State0#state.engines, Rest0) of + {match, {Mod, MarkerId, MatchText, Captured, Rest}} -> + State1 = handle_text(State0), + Loc = State1#state.loc, + EndLoc = bel_scan_loc:read(MatchText, State1#state.loc), + Anno = bel_scan_anno:new(#{ + src => State1#state.src, + loc => Loc, + end_loc => EndLoc, + text => MatchText + }), + Match = {Mod, MarkerId, Captured, Anno}, + Pos = bel_scan_loc:get_pos(EndLoc), + BPart = reset_bpart_pos(Pos, State1), + continue({handle_match, Match}, Rest, State1#state{ + loc = EndLoc, + prev_loc = EndLoc, + bpart = BPart + }); + nomatch -> + continue(scan, Rest0, State0) + end; +continue({handle_match, Match}, Rest, State0) -> + State = handle_match(Match, State0), + continue(find_start_markers, Rest, State). + +terminate(State0) -> + State = handle_text(State0), + handle_terminate(State). + +find_marker([{Mod, Eng} | Engs], Bin) -> + Markers = bel_scan_eng:get_markers(Eng), + case do_find_marker(Markers, Bin) of + {match, {Marker, MatchText, Captured, Rest}} -> + MarkerId = bel_scan_mark:get_id(Marker), + {match, {Mod, MarkerId, MatchText, Captured, Rest}}; + nomatch -> + find_marker(Engs, Bin) + end; +find_marker([], _) -> + nomatch. + +do_find_marker([Marker | Markers], Bin) -> + case bel_scan_mark:re_match(Marker, Bin) of + {match, {MatchText, Captured, Rest}} -> + {match, {Marker, MatchText, Captured, Rest}}; + nomatch -> + do_find_marker(Markers, Bin) + end; +do_find_marker([], _) -> + nomatch. + +handle_start(Bin, State) -> + do_handle_start(State#state.engines, Bin, State). + +do_handle_start([{Mod, _Eng} | Engs], Bin0, State0) -> + case Mod:handle_start(Bin0, State0) of + {noreply, State} -> + do_handle_start(Engs, Bin0, State); + {reply, Bin, State} -> + do_handle_start(Engs, Bin, State); + {halt, State} -> + State + end; +do_handle_start([], Bin, State) -> + BPart = bel_scan_bpart:set_bin(Bin, State#state.bpart), + State#state{bpart = BPart}. + +handle_text(State) -> + handle_text(get_part(State#state.bpart), State). + +handle_text(<<>>, State) -> + State; +handle_text(Text, State) -> + do_handle_text(State#state.engines, Text, State). + +do_handle_text([{Mod, _Eng} | Engs], Text0, State0) -> + case Mod:handle_text(Text0, State0) of + {noreply, State} -> + do_handle_text(Engs, Text0, State); + {reply, Text, State} -> + do_handle_text(Engs, Text, State); + {halt, State} -> + State + end; +do_handle_text([], Text, State) -> + fold(State, [ + fun(S) -> push_token(text_token(Text, S), S) end, + fun(S) -> clear_text(S) end + ]). + +handle_match(Match, State) -> + do_handle_match(State#state.engines, Match, State). + +do_handle_match([{Mod, _Eng} | Engs], Match, State0) -> + case Mod:handle_match(Match, State0) of + {noreply, State} -> + do_handle_match(Engs, Match, State); + {reply, Tokens, State} -> + do_handle_match(Engs, Match, push_tokens(Tokens, State)); + {halt, State} -> + State + end; +do_handle_match([], _Match, State) -> + State. -handle_char(_Char, Rest, State) -> - continue(Rest, snapshot(incr_col(State))). +handle_terminate(State) -> + do_handle_terminate(State#state.engines, State#state.tokens, State). + +do_handle_terminate([{Mod, _Eng} | Engs], Tokens0, State0) -> + case Mod:handle_terminate(Tokens0, State0) of + {noreply, State} -> + do_handle_terminate(Engs, Tokens0, State); + {reply, Tokens, State} -> + do_handle_terminate(Engs, Tokens, State); + {halt, State} -> + State + end; +do_handle_terminate([], Tokens, State) -> + State#state{tokens = Tokens}. -handle_tokens(_Tokens, State) -> - State. +clear_text(#state{loc = Loc} = State) -> + Pos = bel_scan_loc:get_pos(Loc), + State#state{bpart = reset_bpart_pos(Pos, State)}. -% Support - -params(Input) -> - #{input => Input, handler => ?MODULE}. - -% Runners - -new_test() -> - [ { "Should raise 'function_clause' when wrong params" - , ?assertError(function_clause, new(#{}))} - , { "Should return a valid state" - , ?assertEqual(#state{ - input = <<>>, - handler = ?MODULE, - handler_state = maps:get(handler_state, ?DEFAULTS), - tokens = maps:get(tokens, ?DEFAULTS), - ln = maps:get(ln, ?DEFAULTS), - col = maps:get(col, ?DEFAULTS), - snap_loc = maps:get(snap_loc, ?DEFAULTS), - buffer_pos = maps:get(buffer_pos, ?DEFAULTS), - pos = maps:get(pos, ?DEFAULTS), - len = maps:get(len, ?DEFAULTS), - source = maps:get(source, ?DEFAULTS) - }, new(params(<<>>)))} - ]. - -string_test() -> - Input = <<"foo\nbar">>, - State = string([], new(params(Input))), - [ { "Should scan and return the tokens" - , ?assertEqual([], get_tokens(State))} - , { "Should return correct ln" - , ?assertEqual(2, get_ln(State))} - , { "Should return correct col" - , ?assertEqual(4, get_col(State))} - , { "Should return correct loc" - , ?assertEqual({2, 4}, get_loc(State))} - , { "Should return correct snap_loc" - , ?assertEqual({2, 4}, get_snap_loc(State))} - , { "Should return correct buffer_pos" - , ?assertEqual(7, get_buffer_pos(State))} - , { "Should return correct pos" - , ?assertEqual(0, get_pos(State))} - , { "Should return correct len" - , ?assertEqual(7, get_len(State))} - , { "Should return correct pos_text" - , ?assertEqual(Input, pos_text(State))} - ]. - --endif. +reset_bpart_pos(Pos, #state{init_pos = InitPos, bpart = BPart}) -> + bel_scan_bpart:reset_pos(Pos - InitPos, BPart). diff --git a/src/bel_scan_anno.erl b/src/bel_scan_anno.erl new file mode 100644 index 0000000..7c216bd --- /dev/null +++ b/src/bel_scan_anno.erl @@ -0,0 +1,107 @@ +%%%--------------------------------------------------------------------- +%%% @copyright 2024 William Fank Thomé +%%% @author William Fank Thomé +%%% @doc Annotation module. +%%% +%%% Copyright 2024 William Fank Thomé +%%% +%%% Licensed under the Apache License, Version 2.0 (the "License"); +%%% you may not use this file except in compliance with the License. +%%% You may obtain a copy of the License at +%%% +%%% http://www.apache.org/licenses/LICENSE-2.0 +%%% +%%% Unless required by applicable law or agreed to in writing, software +%%% distributed under the License is distributed on an "AS IS" BASIS, +%%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%%% See the License for the specific language governing permissions and +%%% limitations under the License. +%%% +%%% @end +%%%--------------------------------------------------------------------- +-module(bel_scan_anno). + +% API functions +-export([ new/1, to_yecc/1 ]). + +% State getters and setters functions +-export([ get_src/1 + , set_src/2 + , get_loc/1 + , set_loc/2 + , get_end_loc/1 + , set_end_loc/2 + , get_text/1 + , set_text/2 + ]). + +-export_type([ t/0, src/0, ln/0, col/0, loc/0, text/0 ]). + +-record(anno, { src :: src() + , loc :: loc() + , end_loc :: loc() + , text :: text() + }). + +-type t() :: #anno{}. +-type src() :: {file, file:filename_all()} + | {module, module()} + | string + . +-type ln() :: pos_integer(). +-type col() :: pos_integer(). +-type loc() :: bel_scan_loc:t(). +-type text() :: binary(). + +%%%===================================================================== +%%% API functions +%%%===================================================================== + +new(Params) when is_map(Params) -> + #anno{ + src = maps:get(src, Params), + loc = maps:get(loc, Params), + end_loc = maps:get(end_loc, Params), + text = maps:get(text, Params) + }. + +to_yecc(#anno{loc = Loc, text = Text, src = Src}) -> + Anno0 = erl_anno:new(bel_scan_loc:to_tuple(Loc)), + Anno = erl_anno:set_text(binary_to_list(Text), Anno0), + case Src of + {file, File} -> + erl_anno:set_file(File, Anno); + {module, Mod} -> + File = proplists:get_value(source, Mod:module_info(compile)), + erl_anno:set_file(File, Anno); + string -> + Anno + end. + +%%%===================================================================== +%%% State getters and setters functions +%%%===================================================================== + +get_src(#anno{src = Src}) -> + Src. + +set_src(Src, #anno{} = Anno) -> + Anno#anno{src = Src}. + +get_loc(#anno{loc = Loc}) -> + Loc. + +set_loc(Loc, #anno{} = Anno) -> + Anno#anno{loc = Loc}. + +get_end_loc(#anno{end_loc = EndLoc}) -> + EndLoc. + +set_end_loc(EndLoc, #anno{} = Anno) -> + Anno#anno{end_loc = EndLoc}. + +get_text(#anno{text = Text}) -> + Text. + +set_text(Text, #anno{} = Anno) -> + Anno#anno{text = Text}. diff --git a/src/bel_scan_bpart.erl b/src/bel_scan_bpart.erl new file mode 100644 index 0000000..6777a22 --- /dev/null +++ b/src/bel_scan_bpart.erl @@ -0,0 +1,169 @@ +%%%--------------------------------------------------------------------- +%%% @copyright 2024 William Fank Thomé +%%% @author William Fank Thomé +%%% @doc Binary part module. +%%% +%%% Copyright 2024 William Fank Thomé +%%% +%%% Licensed under the Apache License, Version 2.0 (the "License"); +%%% you may not use this file except in compliance with the License. +%%% You may obtain a copy of the License at +%%% +%%% http://www.apache.org/licenses/LICENSE-2.0 +%%% +%%% Unless required by applicable law or agreed to in writing, software +%%% distributed under the License is distributed on an "AS IS" BASIS, +%%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%%% See the License for the specific language governing permissions and +%%% limitations under the License. +%%% +%%% @end +%%%--------------------------------------------------------------------- +-module(bel_scan_bpart). + +% API functions +-export([ new/1 + , reset_pos/2 + , incr_pos/2 + , incr_len/2 + , get_part/1 + ]). + +% State getters and setters functions +-export([ get_bin/1 + , set_bin/2 + , get_pos/1 + , set_pos/2 + , get_len/1 + , set_len/2 + , get_init_len/1 + , set_init_len/2 + ]). + +-export_type([ t/0, pos/0, len/0 ]). + +-define(FIRST_POS, 0). +-define(INIT_LEN, 0). + +-ifdef(TEST). +-include_lib("eunit/include/eunit.hrl"). +-endif. + +-record(bpart, { bin :: binary() + , pos :: pos() + , len :: len() + , init_len :: len() + }). +-type t() :: #bpart{}. +-type pos() :: non_neg_integer(). +-type len() :: non_neg_integer(). + +%%%===================================================================== +%%% API functions +%%%===================================================================== + +new(Params) when is_map(Params) -> + InitLen = maps:get(init_len, Params, ?INIT_LEN), + #bpart{ + bin = maps:get(bin, Params), + pos = maps:get(pos, Params, ?FIRST_POS), + len = maps:get(len, Params, InitLen), + init_len = InitLen + }. + +reset_pos(Pos, #bpart{init_len = InitLen} = BPart) -> + BPart#bpart{ + pos = Pos, + len = InitLen + }. + +incr_pos(N, #bpart{pos = Pos} = BPart) -> + BPart#bpart{pos = Pos+N}. + +incr_len(N, #bpart{len = Len} = BPart) -> + BPart#bpart{len = Len+N}. + +get_part(#bpart{bin = Bin} = BPart) -> + binary:part(Bin, BPart#bpart.pos, BPart#bpart.len). + +%%%===================================================================== +%%% State getters and setters functions +%%%===================================================================== + +get_bin(#bpart{bin = Bin}) -> + Bin. + +set_bin(Bin, #bpart{} = BPart) -> + BPart#bpart{bin = Bin}. + +get_pos(#bpart{pos = Pos}) -> + Pos. + +set_pos(Pos, #bpart{} = BPart) -> + BPart#bpart{pos = Pos}. + +get_len(#bpart{len = Len}) -> + Len. + +set_len(Len, #bpart{} = BPart) -> + BPart#bpart{len = Len}. + +get_init_len(#bpart{init_len = InitLen}) -> + InitLen. + +set_init_len(InitLen, #bpart{} = BPart) -> + BPart#bpart{init_len = InitLen}. + +%%%===================================================================== +%%% Internal functions +%%%===================================================================== + +% nothing here yet! + +%%%===================================================================== +%%% Tests +%%%===================================================================== + +-ifdef(TEST). +-compile([export_all, nowarn_export_all]). + +new_test() -> + [ { "Should have default values" + , ?assertEqual(#bpart{ + bin = <<>>, + pos = ?FIRST_POS, + len = ?INIT_LEN, + init_len = ?INIT_LEN + }, new(#{bin => <<>>})) + } + , { "Should have params values" + , ?assertEqual(#bpart{ + bin = <<>>, + pos = 6, + len = 6, + init_len = 6 + }, new(#{ + bin => <<>>, + pos => 6, + len => 6, + init_len => 6 + })) + } + ]. + +cursor_test() -> + BPart = new(#{bin => <<>>}), + [ { "Should increment one position" + , ?assert((incr_pos(1, BPart))#bpart.pos =:= ?FIRST_POS+1)} + , { "Should increment one length" + , ?assert((incr_len(1, BPart))#bpart.len =:= ?INIT_LEN+1)} + ]. + +get_part_test() -> + ?assertEqual(<<"bpart">>, get_part(new(#{ + bin => <<" bpart ">>, + pos => 2, + len => 5 + }))). + +-endif. diff --git a/src/bel_scan_eng.erl b/src/bel_scan_eng.erl new file mode 100644 index 0000000..9e48091 --- /dev/null +++ b/src/bel_scan_eng.erl @@ -0,0 +1,141 @@ +%%%--------------------------------------------------------------------- +%%% @copyright 2024 William Fank Thomé +%%% @author William Fank Thomé +%%% @doc Engine behaviour module. +%%% +%%% Copyright 2024 William Fank Thomé +%%% +%%% Licensed under the Apache License, Version 2.0 (the "License"); +%%% you may not use this file except in compliance with the License. +%%% You may obtain a copy of the License at +%%% +%%% http://www.apache.org/licenses/LICENSE-2.0 +%%% +%%% Unless required by applicable law or agreed to in writing, software +%%% distributed under the License is distributed on an "AS IS" BASIS, +%%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%%% See the License for the specific language governing permissions and +%%% limitations under the License. +%%% +%%% @end +%%%--------------------------------------------------------------------- +-module(bel_scan_eng). + +% API functions +-export([ compile/1, fold/2 ]). + +% State getters and setters functions +-export([ get_module/1 + , set_module/2 + , get_markers/1 + , set_markers/2 + , get_state/1 + , set_state/2 + ]). + +-export_type([ t/0 + , scan/0 + , marker_id/0 + , token/0 + , loc/0 + , opts/0 + , state/0 + , re_group/0 + , captured/0 + ]). + +-include("bel_scan_eng.hrl"). + +-type t() :: #engine{}. +-type scan() :: bel_scan:t(). +-type marker_id() :: bel_scan_mark:id(). +-type token() :: bel_scan_token:t(). +-type loc() :: bel_scan_loc:t(). +-type opts() :: term(). +-type state() :: term(). +-type re_group() :: binary(). +-type captured() :: [re_group()]. +-type anno() :: bel_scan_anno:t(). + +%%%===================================================================== +%%% Callbacks +%%%===================================================================== + +-callback init(Opts) -> Engine + when Opts :: opts() + , Engine :: t() + . + +-callback handle_start(Bin, Scan) -> Return + when Bin :: binary() + , Scan :: scan() + , Return :: {noreply, scan()} + | {reply, binary(), scan()} + | {halt, scan()} + . + +-callback handle_text(Text, Scan) -> Return + when Text :: binary() + , Scan :: scan() + , Return :: {noreply, scan()} + | {reply, binary(), scan()} + | {halt, scan()} + . + +-callback handle_match(Match, Scan) -> Return + when Match :: {MarkerMod, MarkerId, Captured, Anno} + , MarkerMod :: module() + , MarkerId :: marker_id() + , Captured :: captured() + , Anno :: anno() + , Scan :: scan() + , Return :: {noreply, scan()} + | {reply, [token()], scan()} + | {halt, scan()} + . + +-callback handle_terminate(Tokens, Scan0) -> Return + when Tokens :: [token()] + , Scan0 :: scan() + , Return :: {noreply, scan()} + | {reply, [token()], scan()} + | {halt, scan()} + . + +%%%===================================================================== +%%% API functions +%%%===================================================================== + +compile(#engine{markers = Markers} = Eng) -> + Eng#engine{markers = [bel_scan_mark:compile(M) || M <- Markers]}. + +fold(#engine{} = Eng, Funs) when is_list(Funs) -> + lists:foldl(fun(F, E) -> F(E) end, Eng, Funs). + +%%%===================================================================== +%%% State getters and setters functions +%%%===================================================================== + +get_module(#engine{module = Module}) -> + Module. + +set_module(Module, #engine{} = Eng) when is_atom(Module) -> + Eng#engine{module = Module}. + +get_markers(#engine{markers = Markers}) -> + Markers. + +set_markers(Markers, #engine{} = Eng) -> + Eng#engine{markers = Markers}. + +get_state(#engine{state = State}) -> + State. + +set_state(State, #engine{} = Eng) -> + Eng#engine{state = State}. + +%%%===================================================================== +%%% Internal functions +%%%===================================================================== + +% nothing here yet! diff --git a/src/bel_scan_eng_eel.erl b/src/bel_scan_eng_eel.erl new file mode 100644 index 0000000..af32761 --- /dev/null +++ b/src/bel_scan_eng_eel.erl @@ -0,0 +1,386 @@ +%%%--------------------------------------------------------------------- +%%% @copyright 2024 William Fank Thomé +%%% @author William Fank Thomé +%%% @doc EEl engine module. +%%% +%%% Copyright 2024 William Fank Thomé +%%% +%%% Licensed under the Apache License, Version 2.0 (the "License"); +%%% you may not use this file except in compliance with the License. +%%% You may obtain a copy of the License at +%%% +%%% http://www.apache.org/licenses/LICENSE-2.0 +%%% +%%% Unless required by applicable law or agreed to in writing, software +%%% distributed under the License is distributed on an "AS IS" BASIS, +%%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%%% See the License for the specific language governing permissions and +%%% limitations under the License. +%%% +%%% @end +%%%--------------------------------------------------------------------- +-module(bel_scan_eng_eel). +-behaviour(bel_scan_eng). + +% bel_scan_eng callback functions +-export([ init/1 + , handle_start/2 + , handle_text/2 + , handle_match/2 + , handle_terminate/2 + ]). + +% Support functions +-export([ expr_token/3 + , expr_inline_id/0 + , expr_start_id/0 + , expr_continue_id/0 + , expr_end_id/0 + , comment_id/0 + ]). + +-include("bel_scan_eng.hrl"). + +-ifdef(TEST). +-include_lib("eunit/include/eunit.hrl"). +-endif. + +-define(EXPR_INLINE_ID, eel_expr_inline). +-define(EXPR_START_ID, eel_expr_start). +-define(EXPR_CONTINUE_ID, eel_expr_continue). +-define(EXPR_END_ID, eel_expr_end). +-define(EXPR_COMMENT_ID, eel_comment). + +%%%===================================================================== +%%% bel_scan_eng callback functions +%%%===================================================================== + +init(_Opts) -> + #engine{ + markers = [ + #marker{ + id = expr_inline_id(), + re = "<%=\\s+((?:(?!<%).)*)\\s+\.%>" + }, + #marker{ + id = expr_start_id(), + re = "<%=\\s+((?:(?!<%).)*)\\s+%>" + }, + #marker{ + id = expr_continue_id(), + re = "<%\\s+((?:(?!<%).)*)\\s+%>" + }, + #marker{ + id = expr_end_id(), + re = "<%\\s+((?:(?!<%).)*)\\s+\.%>" + }, + #marker{ + id = comment_id(), + re = "<%!--\s+((?:(?!<%).)*)\s+--%>" + } + % TODO: Use "simple" markers and use a parser (yecc) to + % spot issues, like missing ending marker, e.g.: + % > "<%= case Bool of true -> %>ok<% end %>" <- Missing ".%>" + % #marker{ + % id = '<%=', + % re = "<%=" + % }, + % #marker{ + % id = '.%>', + % re = "\\.\\s*%>" + % }, + % #marker{ + % id = '<%', + % re = "<%(?:(?!=))" + % }, + % #marker{ + % id = '%>', + % re = "(?:(?!\\.))%>" + % }, + % #marker{ + % id = '<%!--', + % re = "<%!--" + % }, + % #marker{ + % id = '--%>', + % re = "--%>" + % } + ] + }. + +handle_start(_Bin, Scan) -> + {noreply, Scan}. + +handle_text(_Text, Scan) -> + {noreply, Scan}. + +handle_match({?MODULE, eel_comment, _Captured, _Anno}, Scan) -> + % TODO: Maybe push a comment token. + {noreply, Scan}; +handle_match({?MODULE, MarkerId, [Expr], Anno}, Scan) -> + Token = expr_token(MarkerId, Anno, Expr), + {reply, [Token], Scan}; +handle_match({Mod, _, _, _}, Scan) when Mod =/= ?MODULE -> + {noreply, Scan}. + +handle_terminate(_Tokens, Scan) -> + {noreply, Scan}. + +%%%===================================================================== +%%% Support functions +%%%===================================================================== + +expr_token(Id, Anno, Expr) -> + bel_scan_token:new(#{ + id => Id, + anno => Anno, + engine => ?MODULE, + metadata => Expr + }). + +expr_inline_id() -> + ?EXPR_INLINE_ID. + +expr_start_id() -> + ?EXPR_START_ID. + +expr_continue_id() -> + ?EXPR_CONTINUE_ID. + +expr_end_id() -> + ?EXPR_END_ID. + +comment_id() -> + ?EXPR_COMMENT_ID. + +%%%===================================================================== +%%% Internal functions +%%%===================================================================== + +% nothing here yet! + +%%%===================================================================== +%%% Tests +%%%===================================================================== + +-ifdef(TEST). +-compile([export_all, nowarn_export_all]). + +% NOTE: Just a test, not intended to have a valid HTML syntax. + +-define(SLINE, << +"a <%= b .%> c <%= d %> e <% f %> g <% h .%> i" +>>). + +-define(MLINE, <<"a <%= b +.%> c <%= +d %> e +<% f + +%> + + g + +<% + +h + + .%> + +i + +">>). + +scan_(Bin) -> + bel_scan:get_tokens(bel_scan:bin(Bin, #{ + engines => [ + ?MODULE, + {bel_scan_eng_html5, #{ + attr_engines => [ + bel_scan_eng_eel_attr, + bel_scan_eng_html5_attr + ] + }} + ] + })). + +scan_test() -> + [ { "Should scan attributes" + , ?assertEqual([ + {token,html_tag, + {anno,string, + {loc,0,1,1,1,1}, + {loc,35,1,36,1,1}, + <<"
foo
">>}, + {<<"div">>, + {[{token,eel_directive, + {anno,string, + {loc,5,1,6,1,1}, + {loc,16,1,17,1,1}, + <<":if={@bool}">>}, + {<<"if">>, + [{token,eel_expr_inline, + {anno,string, + {loc,10,1,11,1,1}, + {loc,15,1,16,1,1}, + <<":if={@bool}">>}, + <<"@bool">>,bel_scan_eng_eel}]}, + bel_scan_eng_eel_attr}, + {token,eel_attribute, + {anno,string, + {loc,17,1,18,1,1}, + {loc,25,1,26,1,1}, + <<"id={@id}">>}, + {<<"id">>, + [{token,eel_expr_inline, + {anno,string, + {loc,21,1,22,1,1}, + {loc,24,1,25,1,1}, + <<"id={@id}">>}, + <<"@id">>,bel_scan_eng_eel}]}, + bel_scan_eng_eel_attr}], + [{token,text, + {anno,string, + {loc,26,1,27,1,1}, + {loc,29,1,30,1,1}, + <<"foo">>}, + undefined,bel_scan}]}}, + bel_scan_eng_html5}, + {token,html_tag, + {anno,string, + {loc,35,1,36,1,1}, + {loc,63,1,64,1,1}, + <<"bar">>}, + {<<"span">>, + {[{token,html5_attribute, + {anno,string, + {loc,41,1,42,1,1}, + {loc,52,1,53,1,1}, + <<"class='foo'">>}, + {<<"class">>,<<"foo">>}, + bel_scan_eng_html5_attr}], + [{token,text, + {anno,string, + {loc,53,1,54,1,1}, + {loc,56,1,57,1,1}, + <<"bar">>}, + undefined,bel_scan}]}}, + bel_scan_eng_html5} + ], scan_(<<"
foo
bar">>))} + , { "Should scan single line" + , ?assertEqual([ + {token,text, + {anno,string, + {loc,0,1,1,1,1}, + {loc,2,1,3,1,1}, + <<"a ">>}, + undefined,bel_scan}, + {token,eel_expr_inline, + {anno,string, + {loc,2,1,3,1,1}, + {loc,11,1,12,1,1}, + <<"<%= b .%>">>}, + <<"b">>,bel_scan_eng_eel}, + {token,text, + {anno,string, + {loc,11,1,12,1,1}, + {loc,14,1,15,1,1}, + <<" c ">>}, + undefined,bel_scan}, + {token,eel_expr_start, + {anno,string, + {loc,14,1,15,1,1}, + {loc,22,1,23,1,1}, + <<"<%= d %>">>}, + <<"d">>,bel_scan_eng_eel}, + {token,text, + {anno,string, + {loc,22,1,23,1,1}, + {loc,25,1,26,1,1}, + <<" e ">>}, + undefined,bel_scan}, + {token,eel_expr_continue, + {anno,string, + {loc,25,1,26,1,1}, + {loc,32,1,33,1,1}, + <<"<% f %>">>}, + <<"f">>,bel_scan_eng_eel}, + {token,text, + {anno,string, + {loc,32,1,33,1,1}, + {loc,35,1,36,1,1}, + <<" g ">>}, + undefined,bel_scan}, + {token,eel_expr_end, + {anno,string, + {loc,35,1,36,1,1}, + {loc,43,1,44,1,1}, + <<"<% h .%>">>}, + <<"h">>,bel_scan_eng_eel}, + {token,text, + {anno,string, + {loc,43,1,44,1,1}, + {loc,45,1,46,1,1}, + <<" i">>}, + undefined,bel_scan} + ], scan_(?SLINE))} + , { "Should scan multiple lines" + , ?assertEqual([ + {token,text, + {anno,string, + {loc,0,1,1,1,1}, + {loc,2,1,3,1,1}, + <<"a ">>}, + undefined,bel_scan}, + {token,eel_expr_inline, + {anno,string, + {loc,2,1,3,1,1}, + {loc,11,2,4,1,1}, + <<"<%= b\n.%>">>}, + <<"b">>,bel_scan_eng_eel}, + {token,text, + {anno,string, + {loc,11,2,4,1,1}, + {loc,14,2,7,1,1}, + <<" c ">>}, + undefined,bel_scan}, + {token,eel_expr_start, + {anno,string, + {loc,14,2,7,1,1}, + {loc,22,3,5,1,1}, + <<"<%=\nd %>">>}, + <<"d">>,bel_scan_eng_eel}, + {token,text, + {anno,string, + {loc,22,3,5,1,1}, + {loc,25,4,1,1,1}, + <<" e\n">>}, + undefined,bel_scan}, + {token,eel_expr_continue, + {anno,string, + {loc,25,4,1,1,1}, + {loc,33,6,3,1,1}, + <<"<% f\n\n%>">>}, + <<"f">>,bel_scan_eng_eel}, + {token,text, + {anno,string, + {loc,33,6,3,1,1}, + {loc,41,10,1,1,1}, + <<"\n\n g\n\n">>}, + undefined,bel_scan}, + {token,eel_expr_end, + {anno,string, + {loc,41,10,1,1,1}, + {loc,60,14,13,1,1}, + <<"<%\n\nh\n\n .%>">>}, + <<"h">>,bel_scan_eng_eel}, + {token,text, + {anno,string, + {loc,60,14,13,1,1}, + {loc,65,18,1,1,1}, + <<"\n\ni\n\n">>}, + undefined,bel_scan} + ], scan_(?MLINE))} + ]. + +-endif. diff --git a/src/bel_scan_eng_eel_attr.erl b/src/bel_scan_eng_eel_attr.erl new file mode 100644 index 0000000..56f249e --- /dev/null +++ b/src/bel_scan_eng_eel_attr.erl @@ -0,0 +1,106 @@ +%%%--------------------------------------------------------------------- +%%% @copyright 2024 William Fank Thomé +%%% @author William Fank Thomé +%%% @doc EEl attributes engine module. +%%% +%%% Copyright 2024 William Fank Thomé +%%% +%%% Licensed under the Apache License, Version 2.0 (the "License"); +%%% you may not use this file except in compliance with the License. +%%% You may obtain a copy of the License at +%%% +%%% http://www.apache.org/licenses/LICENSE-2.0 +%%% +%%% Unless required by applicable law or agreed to in writing, software +%%% distributed under the License is distributed on an "AS IS" BASIS, +%%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%%% See the License for the specific language governing permissions and +%%% limitations under the License. +%%% +%%% @end +%%%--------------------------------------------------------------------- +-module(bel_scan_eng_eel_attr). +-behaviour(bel_scan_eng). + +% bel_scan_eng callback functions +-export([ init/1 + , handle_start/2 + , handle_text/2 + , handle_match/2 + , handle_terminate/2 + ]). + +-include("bel_scan_eng.hrl"). + +-define(DIRECTIVES, [<<"if">>, <<"let">>, <<"for">>]). + +%%%===================================================================== +%%% bel_scan_eng callback functions +%%%===================================================================== + +init(_Opts) -> + #engine{ + markers = [ + #marker{ + id = eel_attribute, + re = "(?:(?!:))(\\w+)=\\{(.*?[^\\\\}])\\}" + }, + #marker{ + id = eel_directive, + re = [":(", lists:join("|", ?DIRECTIVES), ")=\\{(.*?[^\\\\}])\\}"] + } + ] + }. + +handle_start(_Bin, Scan) -> + {noreply, Scan}. + +handle_text(_Text, Scan) -> + {halt, Scan}. + +handle_match({?MODULE, eel_attribute, [K, Expr], Anno}, Scan) -> + {reply, [attribute_token(Anno, {K, Expr})], Scan}; +handle_match({?MODULE, eel_directive, [K, Expr], Anno}, Scan) -> + case lists:member(K, ?DIRECTIVES) of + true -> + {reply, [directive_token(Anno, {K, Expr})], Scan}; + false -> + error({invalid_directive, K}) + end; +handle_match({Mod, _, _, _}, Scan) when Mod =/= ?MODULE -> + {noreply, Scan}. + +handle_terminate(_Tokens, Scan) -> + {noreply, Scan}. + +%%%===================================================================== +%%% Internal functions +%%%===================================================================== + +attribute_token(Anno, {K, Expr}) -> + InitLoc = bel_scan_anno:get_loc(Anno), + Loc = bel_scan_loc:read(<>, InitLoc), + token(eel_attribute, Loc, Anno, {K, Expr}). + +directive_token(Anno, {K, Expr}) -> + InitLoc = bel_scan_anno:get_loc(Anno), + Loc = bel_scan_loc:read(<<$:, K/binary, "={">>, InitLoc), + token(eel_directive, Loc, Anno, {K, Expr}). + +token(Id, Loc, Anno, {K, Expr}) -> + bel_scan_token:new(#{ + id => Id, + anno => Anno, + metadata => {K, expr_tokens(Loc, Anno, Expr)}, + engine => ?MODULE + }). + +expr_tokens(Loc, Anno0, Expr) -> + Anno1 = bel_scan_anno:set_loc(Loc, Anno0), + EndLoc = bel_scan_loc:read(Expr, Loc), + Anno = bel_scan_anno:set_end_loc(EndLoc, Anno1), + [bel_scan_eng_eel:expr_token( + bel_scan_eng_eel:expr_inline_id(), + Anno, + Expr + )]. diff --git a/src/bel_scan_eng_html5.erl b/src/bel_scan_eng_html5.erl new file mode 100644 index 0000000..d5f3471 --- /dev/null +++ b/src/bel_scan_eng_html5.erl @@ -0,0 +1,449 @@ +%%%--------------------------------------------------------------------- +%%% @copyright 2024 William Fank Thomé +%%% @author William Fank Thomé +%%% @doc HTML5 engine module. +%%% +%%% Copyright 2024 William Fank Thomé +%%% +%%% Licensed under the Apache License, Version 2.0 (the "License"); +%%% you may not use this file except in compliance with the License. +%%% You may obtain a copy of the License at +%%% +%%% http://www.apache.org/licenses/LICENSE-2.0 +%%% +%%% Unless required by applicable law or agreed to in writing, software +%%% distributed under the License is distributed on an "AS IS" BASIS, +%%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%%% See the License for the specific language governing permissions and +%%% limitations under the License. +%%% +%%% @end +%%%--------------------------------------------------------------------- +-module(bel_scan_eng_html5). +-behaviour(bel_scan_eng). + +% bel_scan_eng callback functions +-export([ init/1 + , handle_start/2 + , handle_text/2 + , handle_match/2 + , handle_terminate/2 + ]). + +-include("bel_scan_eng.hrl"). + +-ifdef(TEST). +-include_lib("eunit/include/eunit.hrl"). +-endif. + +-define(SPECIAL_TAG, "(script|style|textarea|title)"). + +-define(VOID_TAG, + "(area|base|br|col|embed|hr|img|input|" + "link|meta|param|source|track|wbr)" +). + +-define(ELEM_TAG, "(\\w+)"). + +-define(OPEN_TAG, "(<)"). + +-define(CLOSE_TAG, "(>)"). + +-define(CLOSE_VOID, "(\\/?>)"). + +-define(CLOSING_TAG, "(<\\/(?2)>)"). + +-define(ATTRS, "(.*?)"). + +-define(CONTENT, "(.*?)"). + +-define(CHILD_NODES, "((?:(?R)|(?:(?!<\\/?(?2)).*?))*)"). + +-define(ATTRS_ENGINES, [bel_scan_eng_html5_attr]). + +-record(state, { attr_engines :: [module()] }). + +%%%===================================================================== +%%% bel_scan_eng callback functions +%%%===================================================================== + +init(Opts) -> + #engine{ + % TODO: Use "simple" markers and use a parser (yecc) to + % spot issues, like missing ending marker, e.g.: + % > "
" <- Missing "
" + markers = [ + #marker{ + id = doctype, + re = <<"">> + }, + #marker{ + id = special_tag, + re = <<"(?s)" + ?OPEN_TAG ?SPECIAL_TAG ?ATTRS ?CLOSE_TAG + ?CONTENT + ?CLOSING_TAG + >> + }, + #marker{ + id = void_tag, + re = << + ?OPEN_TAG ?VOID_TAG ?ATTRS ?CLOSE_VOID + >> + }, + #marker{ + id = elem_tag, + re = <<"(?s)" + ?OPEN_TAG ?ELEM_TAG ?ATTRS ?CLOSE_TAG + ?CHILD_NODES + ?CLOSING_TAG + >> + }, + #marker{ + id = comment, + re = "