Skip to content

Commit 98a1a40

Browse files
committed
Improve performance of unweighted ecdf
1 parent 92d1674 commit 98a1a40

File tree

2 files changed

+12
-7
lines changed

2 files changed

+12
-7
lines changed

Project.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
name = "StatsBase"
22
uuid = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
33
authors = ["JuliaStats"]
4-
version = "0.34.6"
4+
version = "0.34.7"
55

66
[deps]
77
AliasTables = "66dad0bd-aa9a-41b7-9441-69ab47430ed8"

src/empirical.jl

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ function (ecdf::ECDF)(v::AbstractVector{<:Real})
4242
end
4343

4444
"""
45-
ecdf(X; weights::AbstractWeights)
45+
ecdf(X[; weights::AbstractVector{<:Real}])
4646
4747
Return an empirical cumulative distribution function (ECDF) based on a vector of samples
4848
given in `X`. Optionally providing `weights` returns a weighted ECDF.
@@ -53,12 +53,17 @@ evaluate CDF values on other samples.
5353
`extrema`, `minimum`, and `maximum` are supported to for obtaining the range over which
5454
function is inside the interval ``(0,1)``; the function is defined for the whole real line.
5555
"""
56-
function ecdf(X::AbstractVector{<:Real}; weights::AbstractVector{<:Real}=Weights(Float64[]))
56+
function ecdf(X::AbstractVector{<:Real}; weights::AbstractVector{<:Real}=weights(Float64[]))
5757
any(isnan, X) && throw(ArgumentError("ecdf can not include NaN values"))
58-
isempty(weights) || length(X) == length(weights) || throw(ArgumentError("data and weight vectors must be the same size," *
59-
"got $(length(X)) and $(length(weights))"))
60-
ord = sortperm(X)
61-
ECDF(X[ord], isempty(weights) ? weights : Weights(weights[ord]))
58+
_weights = weights isa AbstractWeights ? weights : StatsBase.weights(weights)
59+
if isempty(_weights)
60+
return ECDF(sort(X), _weights)
61+
else
62+
length(X) == length(_weights) || throw(ArgumentError("data and weight vectors must be the same size," *
63+
"got $(length(X)) and $(length(_weights))"))
64+
ord = sortperm(X)
65+
ECDF(X[ord], _weights[ord])
66+
end
6267
end
6368

6469
minimum(ecdf::ECDF) = first(ecdf.sorted_values)

0 commit comments

Comments
 (0)