@@ -421,36 +421,101 @@ realXcY(x::Real, y::Real) = x*y
421421realXcY (x:: Complex , y:: Complex ) = real (x)* real (y) + imag (x)* imag (y)
422422
423423"""
424- sem(x)
424+ sem(x; mean=nothing)
425+ sem(x::AbstractArray[, weights::AbstractWeights]; mean=nothing)
425426
426- Return the standard error of the mean of collection `x`,
427- i.e. `sqrt(var(x, corrected=true) / length(x))`.
427+ Return the standard error of the mean for a collection `x`.
428+ A pre-computed `mean` may be provided.
429+
430+ When not using weights, this is the (sample) standard deviation
431+ divided by the sample size. If weights are used, the
432+ variance of the sample mean is calculated as follows:
433+
434+ * `AnalyticWeights`: Not implemented.
435+ * `FrequencyWeights`: ``\\ frac{\\ sum_{i=1}^n w_i (x_i - \\ bar{x_i})^2}{(\\ sum w_i) (\\ sum w_i - 1)}``
436+ * `ProbabilityWeights`: ``\\ frac{n}{n-1} \\ frac{\\ sum_{i=1}^n w_i^2 (x_i - \\ bar{x_i})^2}{\\ left( \\ sum w_i \\ right)^2}``
437+
438+ The standard error is then the square root of the above quantities.
439+
440+ # References
441+
442+ Carl-Erik Särndal, Bengt Swensson, Jan Wretman (1992). Model Assisted Survey Sampling.
443+ New York: Springer. pp. 51-53.
428444"""
429- function sem (x)
430- y = iterate (x)
431- if y === nothing
445+ function sem (x; mean = nothing )
446+ if isempty (x)
447+ # Return the NaN of the type that we would get for a nonempty x
432448 T = eltype (x)
433- # Return the NaN of the type that we would get, had this collection
434- # contained any elements (this is consistent with std)
435- return oftype (sqrt ((abs2 (zero (T)) + abs2 (zero (T)))/ 2 ), NaN )
436- end
437- count = 1
438- value, state = y
439- y = iterate (x, state)
440- # Use Welford algorithm as seen in (among other places)
441- # Knuth's TAOCP, Vol 2, page 232, 3rd edition.
442- M = value / 1
443- S = real (zero (M))
444- while y != = nothing
449+ _mean = mean === nothing ? zero (T) / 1 : mean
450+ z = abs2 (zero (T) - _mean)
451+ return oftype ((z + z) / 2 , NaN )
452+ elseif mean === nothing
453+ n = 0
454+ y = iterate (x)
455+ value, state = y
456+ # Use Welford algorithm as seen in (among other places)
457+ # Knuth's TAOCP, Vol 2, page 232, 3rd edition.
458+ _mean = value / 1
459+ sse = real (zero (_mean))
460+ while y != = nothing
461+ value, state = y
462+ y = iterate (x, state)
463+ n += 1
464+ new_mean = _mean + (value - _mean) / n
465+ sse += realXcY (value - _mean, value - new_mean)
466+ _mean = new_mean
467+ end
468+ else
469+ n = 1
470+ y = iterate (x)
445471 value, state = y
446- y = iterate (x, state)
447- count += 1
448- new_M = M + (value - M) / count
449- S = S + realXcY (value - M, value - new_M)
450- M = new_M
472+ sse = abs2 (value - mean)
473+ while (y = iterate (x, state)) != = nothing
474+ value, state = y
475+ n += 1
476+ sse += abs2 (value - mean)
477+ end
478+ end
479+ variance = sse / (n - 1 )
480+ return sqrt (variance / n)
481+ end
482+
483+ function sem (x:: AbstractArray ; mean= nothing )
484+ if isempty (x)
485+ # Return the NaN of the type that we would get for a nonempty x
486+ T = eltype (x)
487+ _mean = mean === nothing ? zero (T) / 1 : mean
488+ z = abs2 (zero (T) - _mean)
489+ return oftype ((z + z) / 2 , NaN )
490+ end
491+ return sqrt (var (x; mean= mean, corrected= true ) / length (x))
492+ end
493+
494+ function sem (x:: AbstractArray , weights:: UnitWeights ; mean= nothing )
495+ if length (x) ≠ length (weights)
496+ throw (DimensionMismatch (" array and weights do not have the same length" ))
497+ end
498+ return sem (x; mean= mean)
499+ end
500+
501+
502+ # Weighted methods for the above
503+ sem (x:: AbstractArray , weights:: FrequencyWeights ; mean= nothing ) =
504+ sqrt (var (x, weights; mean= mean, corrected= true ) / sum (weights))
505+
506+ function sem (x:: AbstractArray , weights:: ProbabilityWeights ; mean= nothing )
507+ if isempty (x)
508+ # Return the NaN of the type that we would get for a nonempty x
509+ return var (x, weights; mean= mean, corrected= true ) / 0
510+ else
511+ _mean = mean === nothing ? Statistics. mean (x, weights) : mean
512+ # sum of squared errors = sse
513+ sse = sum (Broadcast. instantiate (Broadcast. broadcasted (x, weights) do x_i, w
514+ return abs2 (w * (x_i - _mean))
515+ end ))
516+ n = count (! iszero, weights)
517+ return sqrt (sse * n / (n - 1 )) / sum (weights)
451518 end
452- var = S / (count - 1 )
453- return sqrt (var/ count)
454519end
455520
456521# Median absolute deviation
0 commit comments