Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 17 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,13 @@ mathex
======
[![Build Status](https://api.travis-ci.org/nisbus/mathex.png?branch=master)](https://api.travis-ci.org/nisbus/mathex.png?branch=master)

Extra math functions for Erlang
Extra math and statistics functions for Erlang.
The library provides calculations such as moving averages,
standard deviation, skewness, kurtosis and correlation.

The API can be used standalone from any Erlang application.
All functions operate on lists of numbers and return either a single
value or a list of values.

This library contains the following functions:

Expand Down Expand Up @@ -43,11 +49,16 @@ The float is the correlation of the two.
It will give you all possible combinations and their correlation.


# To Do
Add a sort function to return the correlation_matrix sorted desc/asc.
Add more functions.
Add unit tests.
Add more documentation.

## Running tests

EUnit tests are included and can be executed with:

```bash
rebar3 eunit
```

The tests cover the public API and a few edge cases.


## Documentation
Expand Down
100 changes: 71 additions & 29 deletions src/mathex.erl
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,14 @@

%% API
-export([moving_average/1,average/1,sum/1,
stdev_sample/1,stdev_population/1,
skew/1,kurtosis/1,variance/1,
covariance/2,correlation/2,correlation_matrix/1]).
stdev_sample/1,stdev_population/1,
skew/1,kurtosis/1,variance/1,
covariance/2,correlation/2,correlation_matrix/1]).
%%%===================================================================
%%% API
%%%===================================================================
-spec moving_average(Data :: [] | [float()|integer]) -> []|[float()].
%% @doc Calculate the cumulative moving average for `Data`.
moving_average([]) ->
[];
moving_average([H|_] = Data) when is_list(Data) ->
Expand All @@ -34,17 +35,23 @@ moving_average([H|_] = Data) when is_list(Data) ->
lists:reverse(Res).

-spec stdev_sample(Data :: [] | [float()|integer()]) -> float().
%% @doc Sample standard deviation using n-1 in the denominator.
stdev_sample([])->
0.0;
stdev_sample(Data) when is_list(Data) ->
C = length(Data),
A = average(Data),
math:sqrt(lists:foldl(fun(X,Acc) ->
Acc+math:pow(X-A,2)/ (C-1)
end, 0,Data)).
case length(Data) of
N when N < 2 ->
0.0;
C ->
A = average(Data),
math:sqrt(lists:foldl(fun(X,Acc) ->
Acc+math:pow(X-A,2)/(C-1)
end, 0, Data))
end.

-spec stdev_population(Data :: [] | [float()|integer()]) -> float().
stdev_population([]) ->
%% @doc Population standard deviation.
stdev_population([]) ->
0.0;
stdev_population(Data) when is_list(Data) ->
A = average(Data),
Expand All @@ -53,31 +60,42 @@ stdev_population(Data) when is_list(Data) ->
end,Data))).

-spec skew(Data :: [] | [float()|integer()]) -> float().
%% @doc Unbiased estimator of skewness.
skew([])->
0.0;
skew(Data) when is_list(Data) ->
A = average(Data),
C = length(Data),
Std = stdev_sample(Data),
Mult = (C)/((C-1)*(C-2)),
sum(lists:map(fun(X) ->
math:pow(((X-A)/Std),3)
end,Data))*Mult.
case C < 3 of
true -> 0.0;
false ->
A = average(Data),
Std = stdev_sample(Data),
Mult = C / ((C-1)*(C-2)),
sum(lists:map(fun(X) ->
math:pow(((X-A)/Std),3)
end, Data)) * Mult
end.

-spec kurtosis(Data :: [] | [float()|integer()]) -> float().
%% @doc Unbiased estimator of kurtosis.
kurtosis([]) ->
0.0;
kurtosis(Data) when is_list(Data) ->
A = average(Data),
C = length(Data),
Std = stdev_sample(Data),
Mult = ((C) * (C+1)) / ((C-1) * (C -2) * (C - 3)),
Sub = 3 * (math:pow(C-1,2)) / ((C-2) * (C -3)),
Mult * sum(lists:map(fun(X) ->
math:pow( ((X-A)/Std),4)
end,Data))-Sub.
case C < 4 of
true -> 0.0;
false ->
A = average(Data),
Std = stdev_sample(Data),
Mult = (C * (C+1)) / ((C-1) * (C-2) * (C-3)),
Sub = 3 * math:pow(C-1,2) / ((C-2) * (C-3)),
Mult * sum(lists:map(fun(X) ->
math:pow(((X-A)/Std),4)
end, Data)) - Sub
end.

-spec variance(Data :: [] | [float()|integer()]) -> float().
%% @doc Population variance.
variance([]) ->
0.0;
variance(Data) when is_list(Data) ->
Expand All @@ -88,26 +106,48 @@ variance(Data) when is_list(Data) ->
end,Data))/C.

-spec covariance(Xs :: [] | [float()|integer()],Ys :: [] | [float()|integer()]) -> float().
%% @doc Sample covariance of two data sets. Lists of unequal length are truncated to the shortest.
covariance([],_) ->
0.0;
covariance(_,[]) ->
0.0;
covariance(Xs,Ys) when is_list(Xs) and is_list(Ys) ->
AveX = average(Xs),
AveY = average(Ys),
sum(lists:zipwith(fun(X,Y)->
(X-AveX)*(Y-AveY)
end,Xs,Ys))/(length(Xs)-1).
Pairs = lists:zip(Xs,Ys),
case length(Pairs) of
N when N < 2 -> 0.0;
Len ->
X1 = [X || {X,_} <- Pairs],
Y1 = [Y || {_,Y} <- Pairs],
AveX = average(X1),
AveY = average(Y1),
sum([ (X-AveX)*(Y-AveY) || {X,Y} <- Pairs ])/(Len-1)
end.

-spec correlation(Xs :: [] | [float()|integer()],Ys :: [] | [float()|integer()]) -> float().
%% @doc Pearson correlation coefficient for two data sets.
correlation([],_) ->
0.0;
correlation(_,[]) ->
0.0;
correlation(Xs,Ys) ->
covariance(Xs,Ys) /(stdev_sample(Xs)*stdev_sample(Ys)).
correlation(Xs,Ys) when is_list(Xs) and is_list(Ys) ->
Pairs = lists:zip(Xs,Ys),
case length(Pairs) of
N when N < 2 -> 0.0;
_ ->
X1 = [X || {X,_} <- Pairs],
Y1 = [Y || {_,Y} <- Pairs],
StdX = stdev_sample(X1),
StdY = stdev_sample(Y1),
case StdX =:= 0.0 orelse StdY =:= 0.0 of
true -> 0.0;
false ->
Cov = covariance(X1,Y1),
Cov/(StdX*StdY)
end
end.

-spec correlation_matrix(ListOfLists :: [] | [[float()|integer()]]) -> [{integer(),integer(),float()}].
%% @doc Correlate each list in `ListOfLists` with the others.
correlation_matrix([]) ->
[];
correlation_matrix(ListOfLists) ->
Expand All @@ -122,12 +162,14 @@ correlation_matrix(ListOfLists) ->
end,R))).

-spec average(Data :: [] | [float()|integer()]) -> float().
%% @doc Arithmetic mean of the list.
average([]) ->
0.0;
average(Data) ->
sum(Data)/length(Data).

-spec sum(Data :: [] | [float()|integer()]) -> float()|integer().
%% @doc Sum of all elements in `Data`.
sum([]) ->
0.0;
sum(Data) ->
Expand Down
41 changes: 41 additions & 0 deletions test/mathex_tests.erl
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
-module(mathex_tests).
-include_lib("eunit/include/eunit.hrl").

moving_average_test() ->
?assertEqual([1,1.5,2.0], mathex:moving_average([1,2,3])).

average_test() ->
?assertEqual(2.0, mathex:average([1,2,3])).

sum_test() ->
?assertEqual(6, mathex:sum([1,2,3])).

stdev_sample_test() ->
R = mathex:stdev_sample([2,4,4,4,5,5,7,9]),
?assert(abs(R - 2.138089935299395) < 1.0e-6).

stdev_population_test() ->
R = mathex:stdev_population([2,4,4,4,5,5,7,9]),
?assert(abs(R - 2.0) < 1.0e-6).

variance_test() ->
R = mathex:variance([1,2,3,4]),
?assert(abs(R - 1.25) < 1.0e-6).

covariance_test() ->
R = mathex:covariance([2.1,2.5,4.0,3.6],[8,12,14,10]),
?assert(abs(R - 1.5333333333333332) < 1.0e-6).

correlation_test() ->
R = mathex:correlation([2.1,2.5,4.0,3.6],[8,12,14,10]),
?assert(abs(R - 0.6625738822030289) < 1.0e-6).

correlation_matrix_test() ->
R = mathex:correlation_matrix([[1,2,3],[2,3,4]]),
?assertEqual([{1,2,1.0},{2,1,1.0}], R).

%% edge case tests
single_element_safe_test() ->
?assertEqual(0.0, mathex:stdev_sample([1])),
?assertEqual(0.0, mathex:covariance([1],[1])),
?assertEqual(0.0, mathex:correlation([1],[1])).