diff --git a/.gitignore b/.gitignore index 1fddf79..2289878 100644 --- a/.gitignore +++ b/.gitignore @@ -16,3 +16,8 @@ /doc/* *.trace *~ +_build/ +rebar.lock +__pycache__/ +src/calc_lexer.erl +src/calc_parser.erl diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..5f6422f --- /dev/null +++ b/Dockerfile @@ -0,0 +1,6 @@ +FROM erlang:26 +WORKDIR /usr/src/mathex +COPY . . +RUN rebar3 compile +EXPOSE 4040 +CMD ["erl","-noshell","-pa","_build/default/lib/mathex/ebin","-eval","mathex_mcp_server:start(), timer:sleep(infinity)"] diff --git a/README.md b/README.md index c809894..93c919f 100644 --- a/README.md +++ b/README.md @@ -1,54 +1,93 @@ mathex ====== -[![Build Status](https://api.travis-ci.org/nisbus/mathex.png?branch=master)](https://api.travis-ci.org/nisbus/mathex.png?branch=master) +[![Build Status](https://api.travis-ci.org/nisbus/mathex.png?branch=master)](https://api.travis-ci.org/nisbus/mathex.png?branch=master) -Extra math functions for Erlang +Extra math and statistics functions for Erlang. +The library provides calculations such as moving averages, +standard deviation, skewness, kurtosis and correlation. + +The API can be used standalone from any Erlang application. +All functions operate on lists of numbers and return either a single +value or a list of values. This library contains the following functions: -## Return a single value - -* average -* sum -* stdev_sample -* stdev_population -* skew -* kurtosis -* variance -* covariance - -## Return a list of values - -* moving_average -* correlation -* correlation_matrix - -Most of these are well documented elsewhere except maybe the -correlation_matrix. -The correlation matrix will take in a list of numeric lists -and correlate each list in the collection with all the other lists -of the collection. -The result will look like this: - +## Return a single value + +* average +* sum +* stdev_sample +* stdev_population +* skew +* kurtosis +* variance +* covariance + +## Return a list of values + +* moving_average +* correlation +* correlation_matrix + +In addition to statistics, mathex can parse and evaluate simple algebraic +expressions. See `mathex_calc:eval/1` for evaluating a string expression and +`mathex_calc:calculate/1` for applying an expression to a list of dated values. + +Most of these are well documented elsewhere except maybe the +correlation_matrix. +The correlation matrix will take in a list of numeric lists +and correlate each list in the collection with all the other lists +of the collection. +The result will look like this: + ```erlang [{integer(),integer(),float()}, {integer(),integer(),float()}, {integer(),integer(),float()}, {integer(),integer(),float()}] ``` - -Where the first int is the index of a list in the collection and the -second integer the index of the list it's being correlated to. -The float is the correlation of the two. -It will give you all possible combinations and their correlation. - - -# To Do -Add a sort function to return the correlation_matrix sorted desc/asc. -Add more functions. -Add unit tests. -Add more documentation. - - -## Documentation + +Where the first int is the index of a list in the collection and the +second integer the index of the list it's being correlated to. +The float is the correlation of the two. +It will give you all possible combinations and their correlation. + +# To Do +Add a sort function to return the correlation_matrix sorted desc/asc. +Add more functions. +Add unit tests. +Add more documentation. + +## MCP server and client + +This project exposes the statistics and calculator functions over a minimal +line-based TCP protocol. The easiest way to run the server is via Docker: + +```bash +docker build -t mathex . +docker run -p 4040:4040 mathex +``` + +Once running, you can query the server without any Erlang knowledge by using +the provided Python script: + +```bash +python3 tools/mcp_client.py sum 1 2 3 +python3 tools/mcp_client.py calc "5+5" +``` + +The client connects to `localhost:4040` by default and prints the result of the +requested function or expression. + +## Running tests + +EUnit tests are included and can be executed with: + +```bash +rebar3 eunit +``` + +The tests cover the public API and a few edge cases. + +## Documentation Check out the [gh-pages](http://nisbus.github.com/mathex) diff --git a/src/calc_lexer.xrl b/src/calc_lexer.xrl new file mode 100644 index 0000000..081ed46 --- /dev/null +++ b/src/calc_lexer.xrl @@ -0,0 +1,32 @@ +Definitions. + +Digit = [0-9] +WS = ([\000-\s]|%.*) +Plus = \+ +Minus = \- +Multiply = \* +Divide = \/ +Power = \^ +Function = [A-Za-z] +Open = \( +Close = \) +Op = [\+|\^|\*|\/] + +Rules. + +\-?{Digit}+ : {token, {float, TokenLine, convert_to_float(TokenChars)}}. +\-?{Digit}+\.{Digit}+((E|e)(\+|\-)?{D}+)? : {token, {float, TokenLine, list_to_float(TokenChars)}}. +{Plus} : {token, {add, TokenLine, list_to_atom(TokenChars)}}. +{Minus} : {token, {minus, TokenLine, list_to_atom(TokenChars)}}. +{Multiply} : {token, {multiply, TokenLine, list_to_atom(TokenChars)}}. +{Divide} : {token, {divide, TokenLine, list_to_atom(TokenChars)}}. +{Power} : {token, {power, TokenLine, list_to_atom(TokenChars)}}. +{Open} : {token, {open, TokenLine, list_to_atom(TokenChars)}}. +{Close} : {token, {close, TokenLine, list_to_atom(TokenChars)}}. +{Function}+ : {token, {fn, TokenLine, list_to_atom(TokenChars)}}. +{WS}+ : skip_token. + +Erlang code. + +convert_to_float(List) -> + list_to_float(List++".0000000"). diff --git a/src/calc_parser.yrl b/src/calc_parser.yrl new file mode 100644 index 0000000..3b12353 --- /dev/null +++ b/src/calc_parser.yrl @@ -0,0 +1,95 @@ +Nonterminals + eval expr term factor. + +Terminals + open close add minus multiply divide float power fn. + +Rootsymbol eval. + +Left 100 float. +Left 200 open. +Left 300 close. +Left 400 fn. +Left 500 add. +Left 600 minus. +Left 700 multiply. +Left 800 divide. +Left 900 power. + +eval -> expr : '$1'. + +expr -> expr add term : add('$1','$3'). +expr -> expr minus term : subtract('$1','$3'). +expr -> term : '$1'. + +term -> term multiply factor : multiply('$1','$3'). +term -> term divide factor : divide('$1','$3'). +term -> term power factor : power('$1','$3'). +term -> term fn factor : unwrap('$3'). +term -> factor : '$1'. + +factor -> float : unwrap('$1'). +factor -> open expr close : '$2'. +factor -> fn factor : math(unwrap('$1'),'$2'). +factor -> fn : math(unwrap('$1')). + +Erlang code. + + + +unwrap({_,_,V}) -> V; +unwrap({_,V}) -> V; +unwrap(V) -> V. + +add(A,B) -> + A+B. +subtract(A,B) -> + A-B. +divide(A,B) -> + A/B. +multiply(A,B) -> + A*B. +power(A,B) -> + math:pow(A,B). +math(pi) -> + math:pi(); +math(Other) -> + io:format("Unknown operator ~p~n",[Other]), + 0.0. +math(sqrt,A) -> + math:sqrt(A); +math(sin,A) -> + math:sin(A); +math(cos,A) -> + math:cos(A); +math(tan,A) -> + math:tan(A); +math(asin,A) -> + math:asin(A); +math(acos,A) -> + math:acos(A); +math(atan,A) -> + math:atan(A); +math(sinh,A) -> + math:sinh(A); +math(cosh,A) -> + math:cosh(A); +math(tanh,A) -> + math:tanh(A); +math(asinh,A) -> + math:asinh(A); +math(acosh,A) -> + math:acosh(A); +math(atanh,A) -> + math:atanh(A); +math(exp,A) -> + math:exp(A); +math(log,A) -> + math:log(A); +math(erf,A) -> + math:erf(A); +math(erfc,A) -> + math:erfc(A); +math(Other,_A) -> + io:format("Unknown operator ~p~n",[Other]), + 0.0. diff --git a/src/mathex.erl b/src/mathex.erl index 46e4641..138c500 100644 --- a/src/mathex.erl +++ b/src/mathex.erl @@ -12,13 +12,14 @@ %% API -export([moving_average/1,average/1,sum/1, - stdev_sample/1,stdev_population/1, - skew/1,kurtosis/1,variance/1, - covariance/2,correlation/2,correlation_matrix/1]). + stdev_sample/1,stdev_population/1, + skew/1,kurtosis/1,variance/1, + covariance/2,correlation/2,correlation_matrix/1]). %%%=================================================================== %%% API %%%=================================================================== -spec moving_average(Data :: [] | [float()|integer]) -> []|[float()]. +%% @doc Calculate the cumulative moving average for `Data`. moving_average([]) -> []; moving_average([H|_] = Data) when is_list(Data) -> @@ -34,17 +35,23 @@ moving_average([H|_] = Data) when is_list(Data) -> lists:reverse(Res). -spec stdev_sample(Data :: [] | [float()|integer()]) -> float(). +%% @doc Sample standard deviation using n-1 in the denominator. stdev_sample([])-> 0.0; stdev_sample(Data) when is_list(Data) -> - C = length(Data), - A = average(Data), - math:sqrt(lists:foldl(fun(X,Acc) -> - Acc+math:pow(X-A,2)/ (C-1) - end, 0,Data)). + case length(Data) of + N when N < 2 -> + 0.0; + C -> + A = average(Data), + math:sqrt(lists:foldl(fun(X,Acc) -> + Acc+math:pow(X-A,2)/(C-1) + end, 0, Data)) + end. -spec stdev_population(Data :: [] | [float()|integer()]) -> float(). -stdev_population([]) -> +%% @doc Population standard deviation. +stdev_population([]) -> 0.0; stdev_population(Data) when is_list(Data) -> A = average(Data), @@ -53,31 +60,42 @@ stdev_population(Data) when is_list(Data) -> end,Data))). -spec skew(Data :: [] | [float()|integer()]) -> float(). +%% @doc Unbiased estimator of skewness. skew([])-> 0.0; skew(Data) when is_list(Data) -> - A = average(Data), C = length(Data), - Std = stdev_sample(Data), - Mult = (C)/((C-1)*(C-2)), - sum(lists:map(fun(X) -> - math:pow(((X-A)/Std),3) - end,Data))*Mult. + case C < 3 of + true -> 0.0; + false -> + A = average(Data), + Std = stdev_sample(Data), + Mult = C / ((C-1)*(C-2)), + sum(lists:map(fun(X) -> + math:pow(((X-A)/Std),3) + end, Data)) * Mult + end. -spec kurtosis(Data :: [] | [float()|integer()]) -> float(). +%% @doc Unbiased estimator of kurtosis. kurtosis([]) -> 0.0; kurtosis(Data) when is_list(Data) -> - A = average(Data), C = length(Data), - Std = stdev_sample(Data), - Mult = ((C) * (C+1)) / ((C-1) * (C -2) * (C - 3)), - Sub = 3 * (math:pow(C-1,2)) / ((C-2) * (C -3)), - Mult * sum(lists:map(fun(X) -> - math:pow( ((X-A)/Std),4) - end,Data))-Sub. + case C < 4 of + true -> 0.0; + false -> + A = average(Data), + Std = stdev_sample(Data), + Mult = (C * (C+1)) / ((C-1) * (C-2) * (C-3)), + Sub = 3 * math:pow(C-1,2) / ((C-2) * (C-3)), + Mult * sum(lists:map(fun(X) -> + math:pow(((X-A)/Std),4) + end, Data)) - Sub + end. -spec variance(Data :: [] | [float()|integer()]) -> float(). +%% @doc Population variance. variance([]) -> 0.0; variance(Data) when is_list(Data) -> @@ -88,26 +106,48 @@ variance(Data) when is_list(Data) -> end,Data))/C. -spec covariance(Xs :: [] | [float()|integer()],Ys :: [] | [float()|integer()]) -> float(). +%% @doc Sample covariance of two data sets. Lists of unequal length are truncated to the shortest. covariance([],_) -> 0.0; covariance(_,[]) -> 0.0; covariance(Xs,Ys) when is_list(Xs) and is_list(Ys) -> - AveX = average(Xs), - AveY = average(Ys), - sum(lists:zipwith(fun(X,Y)-> - (X-AveX)*(Y-AveY) - end,Xs,Ys))/(length(Xs)-1). + Pairs = lists:zip(Xs,Ys), + case length(Pairs) of + N when N < 2 -> 0.0; + Len -> + X1 = [X || {X,_} <- Pairs], + Y1 = [Y || {_,Y} <- Pairs], + AveX = average(X1), + AveY = average(Y1), + sum([ (X-AveX)*(Y-AveY) || {X,Y} <- Pairs ])/(Len-1) + end. -spec correlation(Xs :: [] | [float()|integer()],Ys :: [] | [float()|integer()]) -> float(). +%% @doc Pearson correlation coefficient for two data sets. correlation([],_) -> 0.0; correlation(_,[]) -> 0.0; -correlation(Xs,Ys) -> - covariance(Xs,Ys) /(stdev_sample(Xs)*stdev_sample(Ys)). +correlation(Xs,Ys) when is_list(Xs) and is_list(Ys) -> + Pairs = lists:zip(Xs,Ys), + case length(Pairs) of + N when N < 2 -> 0.0; + _ -> + X1 = [X || {X,_} <- Pairs], + Y1 = [Y || {_,Y} <- Pairs], + StdX = stdev_sample(X1), + StdY = stdev_sample(Y1), + case StdX =:= 0.0 orelse StdY =:= 0.0 of + true -> 0.0; + false -> + Cov = covariance(X1,Y1), + Cov/(StdX*StdY) + end + end. -spec correlation_matrix(ListOfLists :: [] | [[float()|integer()]]) -> [{integer(),integer(),float()}]. +%% @doc Correlate each list in `ListOfLists` with the others. correlation_matrix([]) -> []; correlation_matrix(ListOfLists) -> @@ -122,12 +162,14 @@ correlation_matrix(ListOfLists) -> end,R))). -spec average(Data :: [] | [float()|integer()]) -> float(). +%% @doc Arithmetic mean of the list. average([]) -> 0.0; average(Data) -> sum(Data)/length(Data). -spec sum(Data :: [] | [float()|integer()]) -> float()|integer(). +%% @doc Sum of all elements in `Data`. sum([]) -> 0.0; sum(Data) -> diff --git a/src/mathex_calc.erl b/src/mathex_calc.erl new file mode 100644 index 0000000..2d7c352 --- /dev/null +++ b/src/mathex_calc.erl @@ -0,0 +1,29 @@ +%%%------------------------------------------------------------------- +%%% @doc +%%% Evaluates string algebra expressions and simple formulas for +%%% timeseries data. Ported from babelstats project. +%%% @end +%%%------------------------------------------------------------------- +-module(mathex_calc). + +%% API +-export([eval/1, calculate/1]). + +%%-------------------------------------------------------------------- +%% @doc Evaluate an algebraic expression represented as a string. +-spec eval(string()) -> float(). +%%-------------------------------------------------------------------- +eval(Algebra) -> + {ok, Ts, _} = calc_lexer:string(Algebra), + {ok, Result} = calc_parser:parse(Ts), + Result. + +%%-------------------------------------------------------------------- +%% @doc Evaluate a list of {Date, Expression} tuples. +-spec calculate([{calendar:datetime(), string()}]) -> + [{calendar:datetime(), float()}]. +%%-------------------------------------------------------------------- +calculate(Series) -> + lists:map(fun({Date, Expr}) -> + {Date, eval(Expr)} + end, Series). diff --git a/src/mathex_mcp_client.erl b/src/mathex_mcp_client.erl new file mode 100644 index 0000000..4e07ac0 --- /dev/null +++ b/src/mathex_mcp_client.erl @@ -0,0 +1,56 @@ +%% Mathex MCP Client +-module(mathex_mcp_client). + +-export([call/1, call/2, call/3]). + +-define(DEFAULT_HOST, "localhost"). +-define(DEFAULT_PORT, 4040). + +call(Term) -> + call(?DEFAULT_HOST, ?DEFAULT_PORT, Term). + +call(Port, Term) when is_integer(Port) -> + call(?DEFAULT_HOST, Port, Term); +call(Host, Term) when is_list(Host); is_atom(Host) -> + call(Host, ?DEFAULT_PORT, Term). + +call(Host, Port, Term) -> + Options = [binary, {packet, line}], + case gen_tcp:connect(Host, Port, Options) of + {ok, Socket} -> + Request = io_lib:format("~p.~n", [Term]), + gen_tcp:send(Socket, Request), + case gen_tcp:recv(Socket, 0) of + {ok, Line} -> + gen_tcp:close(Socket), + parse_response(binary_to_list(Line)); + {error, Reason} -> + gen_tcp:close(Socket), + {error, Reason} + end; + Error -> Error + end. + +parse_response(Str) -> + Trim = string:trim(Str), + case string:prefix(Trim, "error:") of + true -> + ErrStr = string:trim(string:substr(Trim, 7)), + case parse_term(ErrStr) of + {ok, Term} -> {error, Term}; + Other -> Other + end; + false -> + parse_term(Trim) + end. + +parse_term(Text) -> + case erl_scan:string(Text) of + {ok, Tokens, _} -> + case erl_parse:parse_term(Tokens) of + {ok, Term} -> {ok, Term}; + Error -> {error, Error} + end; + Error -> {error, Error} + end. + diff --git a/src/mathex_mcp_server.erl b/src/mathex_mcp_server.erl new file mode 100644 index 0000000..6ddd8fe --- /dev/null +++ b/src/mathex_mcp_server.erl @@ -0,0 +1,67 @@ +%% Mathex MCP Server +-module(mathex_mcp_server). + +-export([start/0, start/1, stop/0]). + +%% simple line based protocol: each request is an Erlang term ending with '.' +%% Example: {sum,[1,2,3]}. + +-define(DEFAULT_PORT, 4040). + +start() -> + start(?DEFAULT_PORT). + +start(Port) when is_integer(Port) -> + {ok, Listen} = gen_tcp:listen(Port, [binary, {packet, line}, + {active, false}, {reuseaddr, true}]), + spawn(fun() -> accept(Listen) end), + {ok, Listen}. + +stop() -> + ok. + +accept(Listen) -> + case gen_tcp:accept(Listen) of + {ok, Socket} -> + spawn(fun() -> accept(Listen) end), + loop(Socket); + {error, _} -> + ok + end. + +loop(Socket) -> + case gen_tcp:recv(Socket, 0) of + {ok, Line} -> + Response = handle_request(Line), + gen_tcp:send(Socket, Response ++ "\n"), + loop(Socket); + {error, closed} -> + ok + end. + +handle_request(Line) -> + Str = binary_to_list(Line), + case parse_term(Str) of + {ok, {calc, Expr}} -> + Result = (catch mathex_calc:eval(Expr)), + io_lib:format("~p", [Result]); + {ok, {calculate, Series}} -> + Result = (catch mathex_calc:calculate(Series)), + io_lib:format("~p", [Result]); + {ok, {Func, Args}} -> + Result = (catch apply(mathex, Func, Args)), + io_lib:format("~p", [Result]); + {error, Reason} -> + io_lib:format("error: ~p", [Reason]) + end. + +parse_term(Str) -> + case erl_scan:string(Str) of + {ok, Tokens, _} -> + case erl_parse:parse_term(Tokens) of + {ok, Term} -> {ok, Term}; + Error -> {error, Error} + end; + Error -> {error, Error} + end. + diff --git a/test/mathex_tests.erl b/test/mathex_tests.erl new file mode 100644 index 0000000..e424a4f --- /dev/null +++ b/test/mathex_tests.erl @@ -0,0 +1,50 @@ +-module(mathex_tests). +-include_lib("eunit/include/eunit.hrl"). + +moving_average_test() -> + ?assertEqual([1,1.5,2.0], mathex:moving_average([1,2,3])). + +average_test() -> + ?assertEqual(2.0, mathex:average([1,2,3])). + +sum_test() -> + ?assertEqual(6, mathex:sum([1,2,3])). + +stdev_sample_test() -> + R = mathex:stdev_sample([2,4,4,4,5,5,7,9]), + ?assert(abs(R - 2.138089935299395) < 1.0e-6). + +stdev_population_test() -> + R = mathex:stdev_population([2,4,4,4,5,5,7,9]), + ?assert(abs(R - 2.0) < 1.0e-6). + +variance_test() -> + R = mathex:variance([1,2,3,4]), + ?assert(abs(R - 1.25) < 1.0e-6). + +covariance_test() -> + R = mathex:covariance([2.1,2.5,4.0,3.6],[8,12,14,10]), + ?assert(abs(R - 1.5333333333333332) < 1.0e-6). + +correlation_test() -> + R = mathex:correlation([2.1,2.5,4.0,3.6],[8,12,14,10]), + ?assert(abs(R - 0.6625738822030289) < 1.0e-6). + +correlation_matrix_test() -> + R = mathex:correlation_matrix([[1,2,3],[2,3,4]]), + ?assertEqual([{1,2,1.0},{2,1,1.0}], R). + +%% edge case tests +single_element_safe_test() -> + ?assertEqual(0.0, mathex:stdev_sample([1])), + ?assertEqual(0.0, mathex:covariance([1],[1])), + ?assertEqual(0.0, mathex:correlation([1],[1])). + +calc_eval_test() -> + ?assertEqual(10.0, mathex_calc:eval("5+5")). + +calc_series_test() -> + D1 = {{2024,1,1},{0,0,0}}, + D2 = {{2024,1,2},{0,0,0}}, + Series = mathex_calc:calculate([{D1,"1+1"},{D2,"2*3"}]), + ?assertEqual([{D1,2.0},{D2,6.0}], Series). diff --git a/tools/mcp_client.py b/tools/mcp_client.py new file mode 100755 index 0000000..10c02c0 --- /dev/null +++ b/tools/mcp_client.py @@ -0,0 +1,41 @@ +#!/usr/bin/env python3 +import argparse +import socket +import sys + + +def parse_args(): + p = argparse.ArgumentParser(description="Mathex MCP client") + p.add_argument("function", help="Mathex function to call") + p.add_argument("args", nargs="*", help="Arguments to the function") + p.add_argument("--host", default="localhost", help="Server host") + p.add_argument("--port", type=int, default=4040, help="Server port") + return p.parse_args() + + +def main(): + opts = parse_args() + if opts.function == "calc": + expr = " ".join(opts.args) + term = f"{{calc,\"{expr}\"}}.\n" + else: + numbers = [] + for a in opts.args: + try: + if "." in a: + numbers.append(float(a)) + else: + numbers.append(int(a)) + except ValueError: + sys.stderr.write(f"invalid number: {a}\n") + return 1 + term = f"{{{opts.function},[{','.join(map(str, numbers))}]}}.\n" + with socket.create_connection((opts.host, opts.port)) as s: + s.sendall(term.encode()) + resp = s.recv(4096).decode().strip() + print(resp) + return 0 + + +if __name__ == "__main__": + sys.exit(main())