From 62dc3eccf37f70e7af10fc84178a52ca1384b232 Mon Sep 17 00:00:00 2001 From: Francesco Romani Date: Wed, 14 May 2025 18:06:37 +0200 Subject: [PATCH 1/6] [AA][HI]: perfprof creator: autosizing implementation Implement reserved cpu (aka infra+control plane) sizing using a the linear programming optimization (gonum/optimize). The core idea is to model the constraints and let the optimization package compute the desired target. These changes where AI-Assisted (hence the AA tag), then largely amended by a human (hence the HI tag - Human Intervention). The initial penalty cost structure was suggested by google Gemini 2.5 flash, and then amended by human intervention. Assisted-by: Google Gemini Assisted-by-model: gemini-2.5-flash Signed-off-by: Francesco Romani --- go.mod | 1 + go.sum | 2 + .../profilecreator/autosize/autosize.go | 246 ++ .../profilecreator/cmd/root.go | 37 +- .../x/tools/container/intsets/sparse.go | 1107 ++++++ vendor/gonum.org/v1/gonum/AUTHORS | 141 + vendor/gonum.org/v1/gonum/CONTRIBUTORS | 144 + vendor/gonum.org/v1/gonum/LICENSE | 23 + vendor/gonum.org/v1/gonum/blas/README.md | 51 + vendor/gonum.org/v1/gonum/blas/blas.go | 283 ++ .../gonum.org/v1/gonum/blas/blas64/blas64.go | 533 +++ vendor/gonum.org/v1/gonum/blas/blas64/conv.go | 263 ++ .../v1/gonum/blas/blas64/conv_symmetric.go | 153 + vendor/gonum.org/v1/gonum/blas/blas64/doc.go | 6 + .../v1/gonum/blas/cblas128/cblas128.go | 600 ++++ .../gonum.org/v1/gonum/blas/cblas128/conv.go | 265 ++ .../v1/gonum/blas/cblas128/conv_hermitian.go | 155 + .../v1/gonum/blas/cblas128/conv_symmetric.go | 155 + .../gonum.org/v1/gonum/blas/cblas128/doc.go | 6 + .../gonum.org/v1/gonum/blas/conversions.bash | 159 + vendor/gonum.org/v1/gonum/blas/doc.go | 108 + vendor/gonum.org/v1/gonum/blas/gonum/dgemm.go | 297 ++ vendor/gonum.org/v1/gonum/blas/gonum/doc.go | 99 + .../gonum.org/v1/gonum/blas/gonum/errors.go | 35 + vendor/gonum.org/v1/gonum/blas/gonum/gonum.go | 38 + .../v1/gonum/blas/gonum/level1cmplx128.go | 454 +++ .../v1/gonum/blas/gonum/level1cmplx64.go | 476 +++ .../v1/gonum/blas/gonum/level1float32.go | 653 ++++ .../gonum/blas/gonum/level1float32_dsdot.go | 54 + .../v1/gonum/blas/gonum/level1float32_sdot.go | 54 + .../gonum/blas/gonum/level1float32_sdsdot.go | 54 + .../v1/gonum/blas/gonum/level1float64.go | 629 ++++ .../v1/gonum/blas/gonum/level1float64_ddot.go | 50 + .../v1/gonum/blas/gonum/level2cmplx128.go | 2940 ++++++++++++++++ .../v1/gonum/blas/gonum/level2cmplx64.go | 2976 +++++++++++++++++ .../v1/gonum/blas/gonum/level2float32.go | 2400 +++++++++++++ .../v1/gonum/blas/gonum/level2float64.go | 2366 +++++++++++++ .../v1/gonum/blas/gonum/level3cmplx128.go | 1751 ++++++++++ .../v1/gonum/blas/gonum/level3cmplx64.go | 1771 ++++++++++ .../v1/gonum/blas/gonum/level3float32.go | 925 +++++ .../v1/gonum/blas/gonum/level3float64.go | 913 +++++ vendor/gonum.org/v1/gonum/blas/gonum/sgemm.go | 301 ++ .../v1/gonum/blas/gonum/single_precision.bash | 224 ++ vendor/gonum.org/v1/gonum/floats/README.md | 7 + vendor/gonum.org/v1/gonum/floats/doc.go | 11 + vendor/gonum.org/v1/gonum/floats/floats.go | 808 +++++ .../gonum.org/v1/gonum/floats/scalar/doc.go | 6 + .../v1/gonum/floats/scalar/scalar.go | 171 + .../gonum/internal/asm/c128/axpyinc_amd64.s | 134 + .../gonum/internal/asm/c128/axpyincto_amd64.s | 141 + .../internal/asm/c128/axpyunitary_amd64.s | 122 + .../internal/asm/c128/axpyunitaryto_amd64.s | 123 + .../v1/gonum/internal/asm/c128/doc.go | 6 + .../gonum/internal/asm/c128/dotcinc_amd64.s | 153 + .../internal/asm/c128/dotcunitary_amd64.s | 143 + .../gonum/internal/asm/c128/dotuinc_amd64.s | 141 + .../internal/asm/c128/dotuunitary_amd64.s | 130 + .../gonum/internal/asm/c128/dscalinc_amd64.s | 69 + .../internal/asm/c128/dscalunitary_amd64.s | 66 + .../v1/gonum/internal/asm/c128/scal.go | 33 + .../internal/asm/c128/scalUnitary_amd64.s | 116 + .../gonum/internal/asm/c128/scalinc_amd64.s | 121 + .../v1/gonum/internal/asm/c128/stubs.go | 180 + .../v1/gonum/internal/asm/c128/stubs_amd64.go | 109 + .../v1/gonum/internal/asm/c128/stubs_noasm.go | 176 + .../v1/gonum/internal/asm/c64/axpyinc_amd64.s | 151 + .../gonum/internal/asm/c64/axpyincto_amd64.s | 156 + .../internal/asm/c64/axpyunitary_amd64.s | 160 + .../internal/asm/c64/axpyunitaryto_amd64.s | 157 + .../v1/gonum/internal/asm/c64/conj.go | 7 + .../v1/gonum/internal/asm/c64/doc.go | 6 + .../v1/gonum/internal/asm/c64/dotcinc_amd64.s | 160 + .../internal/asm/c64/dotcunitary_amd64.s | 208 ++ .../v1/gonum/internal/asm/c64/dotuinc_amd64.s | 148 + .../internal/asm/c64/dotuunitary_amd64.s | 197 ++ .../v1/gonum/internal/asm/c64/scal.go | 85 + .../v1/gonum/internal/asm/c64/stubs.go | 180 + .../v1/gonum/internal/asm/c64/stubs_amd64.go | 77 + .../v1/gonum/internal/asm/c64/stubs_noasm.go | 122 + .../v1/gonum/internal/asm/f32/axpyinc_amd64.s | 73 + .../gonum/internal/asm/f32/axpyincto_amd64.s | 78 + .../internal/asm/f32/axpyunitary_amd64.s | 97 + .../internal/asm/f32/axpyunitaryto_amd64.s | 98 + .../v1/gonum/internal/asm/f32/ddotinc_amd64.s | 91 + .../internal/asm/f32/ddotunitary_amd64.s | 110 + .../v1/gonum/internal/asm/f32/doc.go | 6 + .../v1/gonum/internal/asm/f32/dotinc_amd64.s | 85 + .../gonum/internal/asm/f32/dotunitary_amd64.s | 106 + .../v1/gonum/internal/asm/f32/ge_amd64.go | 18 + .../v1/gonum/internal/asm/f32/ge_amd64.s | 757 +++++ .../v1/gonum/internal/asm/f32/ge_noasm.go | 39 + .../v1/gonum/internal/asm/f32/gemv.go | 92 + .../v1/gonum/internal/asm/f32/l2norm.go | 90 + .../v1/gonum/internal/asm/f32/scal.go | 59 + .../v1/gonum/internal/asm/f32/stubs_amd64.go | 86 + .../v1/gonum/internal/asm/f32/stubs_noasm.go | 137 + .../v1/gonum/internal/asm/f32/sum_amd64.s | 100 + .../v1/gonum/internal/asm/f64/abssum_amd64.s | 82 + .../gonum/internal/asm/f64/abssuminc_amd64.s | 90 + .../v1/gonum/internal/asm/f64/add_amd64.s | 66 + .../gonum/internal/asm/f64/addconst_amd64.s | 53 + .../v1/gonum/internal/asm/f64/axpy.go | 62 + .../v1/gonum/internal/asm/f64/axpyinc_amd64.s | 142 + .../gonum/internal/asm/f64/axpyincto_amd64.s | 148 + .../internal/asm/f64/axpyunitary_amd64.s | 134 + .../internal/asm/f64/axpyunitaryto_amd64.s | 140 + .../v1/gonum/internal/asm/f64/cumprod_amd64.s | 71 + .../v1/gonum/internal/asm/f64/cumsum_amd64.s | 64 + .../v1/gonum/internal/asm/f64/div_amd64.s | 67 + .../v1/gonum/internal/asm/f64/divto_amd64.s | 73 + .../v1/gonum/internal/asm/f64/doc.go | 6 + .../v1/gonum/internal/asm/f64/dot.go | 38 + .../v1/gonum/internal/asm/f64/dot_amd64.s | 145 + .../v1/gonum/internal/asm/f64/ge_amd64.go | 29 + .../v1/gonum/internal/asm/f64/ge_noasm.go | 125 + .../v1/gonum/internal/asm/f64/gemvN_amd64.s | 685 ++++ .../v1/gonum/internal/asm/f64/gemvT_amd64.s | 745 +++++ .../v1/gonum/internal/asm/f64/ger_amd64.s | 591 ++++ .../v1/gonum/internal/asm/f64/l1norm_amd64.s | 58 + .../v1/gonum/internal/asm/f64/l2norm_amd64.s | 109 + .../v1/gonum/internal/asm/f64/l2norm_noasm.go | 93 + .../gonum/internal/asm/f64/l2normdist_amd64.s | 115 + .../gonum/internal/asm/f64/l2norminc_amd64.s | 110 + .../gonum/internal/asm/f64/linfnorm_amd64.s | 57 + .../v1/gonum/internal/asm/f64/scal.go | 62 + .../v1/gonum/internal/asm/f64/scalinc_amd64.s | 113 + .../gonum/internal/asm/f64/scalincto_amd64.s | 122 + .../internal/asm/f64/scalunitary_amd64.s | 112 + .../internal/asm/f64/scalunitaryto_amd64.s | 113 + .../v1/gonum/internal/asm/f64/stubs_amd64.go | 277 ++ .../v1/gonum/internal/asm/f64/stubs_noasm.go | 182 + .../v1/gonum/internal/asm/f64/sum_amd64.s | 99 + .../v1/gonum/internal/cmplx64/abs.go | 14 + .../v1/gonum/internal/cmplx64/conj.go | 12 + .../v1/gonum/internal/cmplx64/doc.go | 7 + .../v1/gonum/internal/cmplx64/isinf.go | 25 + .../v1/gonum/internal/cmplx64/isnan.go | 29 + .../v1/gonum/internal/cmplx64/sqrt.go | 108 + .../gonum.org/v1/gonum/internal/math32/doc.go | 7 + .../v1/gonum/internal/math32/math.go | 166 + .../v1/gonum/internal/math32/signbit.go | 16 + .../v1/gonum/internal/math32/sqrt.go | 26 + .../v1/gonum/internal/math32/sqrt_amd64.go | 22 + .../v1/gonum/internal/math32/sqrt_amd64.s | 17 + .../v1/gonum/internal/math32/sqrt_arm64.go | 22 + .../v1/gonum/internal/math32/sqrt_arm64.s | 18 + vendor/gonum.org/v1/gonum/lapack/.gitignore | 0 vendor/gonum.org/v1/gonum/lapack/README.md | 29 + vendor/gonum.org/v1/gonum/lapack/doc.go | 6 + .../gonum.org/v1/gonum/lapack/gonum/dbdsqr.go | 506 +++ .../gonum.org/v1/gonum/lapack/gonum/dgebak.go | 91 + .../gonum.org/v1/gonum/lapack/gonum/dgebal.go | 248 ++ .../gonum.org/v1/gonum/lapack/gonum/dgebd2.go | 88 + .../gonum.org/v1/gonum/lapack/gonum/dgebrd.go | 169 + .../gonum.org/v1/gonum/lapack/gonum/dgecon.go | 106 + .../gonum.org/v1/gonum/lapack/gonum/dgeev.go | 287 ++ .../gonum.org/v1/gonum/lapack/gonum/dgehd2.go | 105 + .../gonum.org/v1/gonum/lapack/gonum/dgehrd.go | 202 ++ .../gonum.org/v1/gonum/lapack/gonum/dgelq2.go | 65 + .../gonum.org/v1/gonum/lapack/gonum/dgelqf.go | 97 + .../gonum.org/v1/gonum/lapack/gonum/dgels.go | 220 ++ .../gonum.org/v1/gonum/lapack/gonum/dgeql2.go | 67 + .../gonum.org/v1/gonum/lapack/gonum/dgeqp3.go | 195 ++ .../gonum.org/v1/gonum/lapack/gonum/dgeqr2.go | 78 + .../gonum.org/v1/gonum/lapack/gonum/dgeqrf.go | 108 + .../gonum.org/v1/gonum/lapack/gonum/dgerq2.go | 74 + .../gonum.org/v1/gonum/lapack/gonum/dgerqf.go | 135 + .../gonum.org/v1/gonum/lapack/gonum/dgesc2.go | 93 + .../gonum.org/v1/gonum/lapack/gonum/dgesv.go | 60 + .../gonum.org/v1/gonum/lapack/gonum/dgesvd.go | 1378 ++++++++ .../gonum.org/v1/gonum/lapack/gonum/dgetc2.go | 125 + .../gonum.org/v1/gonum/lapack/gonum/dgetf2.go | 90 + .../gonum.org/v1/gonum/lapack/gonum/dgetrf.go | 89 + .../gonum.org/v1/gonum/lapack/gonum/dgetri.go | 116 + .../gonum.org/v1/gonum/lapack/gonum/dgetrs.go | 74 + .../gonum.org/v1/gonum/lapack/gonum/dgghrd.go | 125 + .../v1/gonum/lapack/gonum/dggsvd3.go | 258 ++ .../v1/gonum/lapack/gonum/dggsvp3.go | 286 ++ .../gonum.org/v1/gonum/lapack/gonum/dgtsv.go | 101 + .../gonum.org/v1/gonum/lapack/gonum/dhseqr.go | 272 ++ .../gonum.org/v1/gonum/lapack/gonum/dlabrd.go | 183 + .../gonum.org/v1/gonum/lapack/gonum/dlacn2.go | 136 + .../gonum.org/v1/gonum/lapack/gonum/dlacpy.go | 59 + .../gonum.org/v1/gonum/lapack/gonum/dlae2.go | 51 + .../gonum.org/v1/gonum/lapack/gonum/dlaev2.go | 85 + .../gonum.org/v1/gonum/lapack/gonum/dlaexc.go | 269 ++ .../gonum.org/v1/gonum/lapack/gonum/dlag2.go | 237 ++ .../gonum.org/v1/gonum/lapack/gonum/dlags2.go | 186 ++ .../gonum.org/v1/gonum/lapack/gonum/dlagtm.go | 111 + .../gonum.org/v1/gonum/lapack/gonum/dlahqr.go | 449 +++ .../gonum.org/v1/gonum/lapack/gonum/dlahr2.go | 202 ++ .../gonum.org/v1/gonum/lapack/gonum/dlaln2.go | 407 +++ .../gonum.org/v1/gonum/lapack/gonum/dlangb.go | 87 + .../gonum.org/v1/gonum/lapack/gonum/dlange.go | 89 + .../gonum.org/v1/gonum/lapack/gonum/dlangt.go | 115 + .../gonum.org/v1/gonum/lapack/gonum/dlanhs.go | 78 + .../gonum.org/v1/gonum/lapack/gonum/dlansb.go | 131 + .../gonum.org/v1/gonum/lapack/gonum/dlanst.go | 75 + .../gonum.org/v1/gonum/lapack/gonum/dlansy.go | 125 + .../gonum.org/v1/gonum/lapack/gonum/dlantb.go | 209 ++ .../gonum.org/v1/gonum/lapack/gonum/dlantr.go | 252 ++ .../gonum.org/v1/gonum/lapack/gonum/dlanv2.go | 151 + .../gonum.org/v1/gonum/lapack/gonum/dlapll.go | 55 + .../gonum.org/v1/gonum/lapack/gonum/dlapmr.go | 88 + .../gonum.org/v1/gonum/lapack/gonum/dlapmt.go | 89 + .../gonum.org/v1/gonum/lapack/gonum/dlapy2.go | 14 + .../gonum.org/v1/gonum/lapack/gonum/dlaqp2.go | 127 + .../gonum.org/v1/gonum/lapack/gonum/dlaqps.go | 244 ++ .../v1/gonum/lapack/gonum/dlaqr04.go | 493 +++ .../gonum.org/v1/gonum/lapack/gonum/dlaqr1.go | 61 + .../v1/gonum/lapack/gonum/dlaqr23.go | 423 +++ .../gonum.org/v1/gonum/lapack/gonum/dlaqr5.go | 560 ++++ .../gonum.org/v1/gonum/lapack/gonum/dlarf.go | 102 + .../gonum.org/v1/gonum/lapack/gonum/dlarfb.go | 461 +++ .../gonum.org/v1/gonum/lapack/gonum/dlarfg.go | 75 + .../gonum.org/v1/gonum/lapack/gonum/dlarft.go | 169 + .../gonum.org/v1/gonum/lapack/gonum/dlarfx.go | 552 +++ .../gonum.org/v1/gonum/lapack/gonum/dlartg.go | 73 + .../gonum.org/v1/gonum/lapack/gonum/dlas2.go | 45 + .../gonum.org/v1/gonum/lapack/gonum/dlascl.go | 111 + .../gonum.org/v1/gonum/lapack/gonum/dlaset.go | 58 + .../gonum.org/v1/gonum/lapack/gonum/dlasq1.go | 100 + .../gonum.org/v1/gonum/lapack/gonum/dlasq2.go | 370 ++ .../gonum.org/v1/gonum/lapack/gonum/dlasq3.go | 172 + .../gonum.org/v1/gonum/lapack/gonum/dlasq4.go | 249 ++ .../gonum.org/v1/gonum/lapack/gonum/dlasq5.go | 140 + .../gonum.org/v1/gonum/lapack/gonum/dlasq6.go | 118 + .../gonum.org/v1/gonum/lapack/gonum/dlasr.go | 287 ++ .../gonum.org/v1/gonum/lapack/gonum/dlasrt.go | 36 + .../gonum.org/v1/gonum/lapack/gonum/dlassq.go | 131 + .../gonum.org/v1/gonum/lapack/gonum/dlasv2.go | 117 + .../gonum.org/v1/gonum/lapack/gonum/dlaswp.go | 58 + .../gonum.org/v1/gonum/lapack/gonum/dlasy2.go | 292 ++ .../gonum.org/v1/gonum/lapack/gonum/dlatbs.go | 454 +++ .../gonum.org/v1/gonum/lapack/gonum/dlatdf.go | 175 + .../gonum.org/v1/gonum/lapack/gonum/dlatrd.go | 176 + .../gonum.org/v1/gonum/lapack/gonum/dlatrs.go | 410 +++ .../gonum.org/v1/gonum/lapack/gonum/dlauu2.go | 66 + .../gonum.org/v1/gonum/lapack/gonum/dlauum.go | 83 + vendor/gonum.org/v1/gonum/lapack/gonum/doc.go | 28 + .../gonum.org/v1/gonum/lapack/gonum/dorg2l.go | 78 + .../gonum.org/v1/gonum/lapack/gonum/dorg2r.go | 77 + .../gonum.org/v1/gonum/lapack/gonum/dorgbr.go | 138 + .../gonum.org/v1/gonum/lapack/gonum/dorghr.go | 103 + .../gonum.org/v1/gonum/lapack/gonum/dorgl2.go | 79 + .../gonum.org/v1/gonum/lapack/gonum/dorglq.go | 125 + .../gonum.org/v1/gonum/lapack/gonum/dorgql.go | 139 + .../gonum.org/v1/gonum/lapack/gonum/dorgqr.go | 136 + .../gonum.org/v1/gonum/lapack/gonum/dorgr2.go | 83 + .../gonum.org/v1/gonum/lapack/gonum/dorgtr.go | 106 + .../gonum.org/v1/gonum/lapack/gonum/dorm2r.go | 103 + .../gonum.org/v1/gonum/lapack/gonum/dormbr.go | 180 + .../gonum.org/v1/gonum/lapack/gonum/dormhr.go | 134 + .../gonum.org/v1/gonum/lapack/gonum/dorml2.go | 104 + .../gonum.org/v1/gonum/lapack/gonum/dormlq.go | 176 + .../gonum.org/v1/gonum/lapack/gonum/dormqr.go | 180 + .../gonum.org/v1/gonum/lapack/gonum/dormr2.go | 105 + .../gonum.org/v1/gonum/lapack/gonum/dpbcon.go | 111 + .../gonum.org/v1/gonum/lapack/gonum/dpbtf2.go | 114 + .../gonum.org/v1/gonum/lapack/gonum/dpbtrf.go | 216 ++ .../gonum.org/v1/gonum/lapack/gonum/dpbtrs.go | 69 + .../gonum.org/v1/gonum/lapack/gonum/dpocon.go | 90 + .../gonum.org/v1/gonum/lapack/gonum/dpotf2.go | 82 + .../gonum.org/v1/gonum/lapack/gonum/dpotrf.go | 81 + .../gonum.org/v1/gonum/lapack/gonum/dpotri.go | 44 + .../gonum.org/v1/gonum/lapack/gonum/dpotrs.go | 64 + .../gonum.org/v1/gonum/lapack/gonum/dpstf2.go | 202 ++ .../gonum.org/v1/gonum/lapack/gonum/dpstrf.go | 233 ++ .../gonum.org/v1/gonum/lapack/gonum/dptcon.go | 99 + .../gonum.org/v1/gonum/lapack/gonum/dptsv.go | 49 + .../gonum.org/v1/gonum/lapack/gonum/dpttrf.go | 80 + .../gonum.org/v1/gonum/lapack/gonum/dpttrs.go | 51 + .../gonum.org/v1/gonum/lapack/gonum/dptts2.go | 39 + .../gonum.org/v1/gonum/lapack/gonum/drscl.go | 63 + .../gonum.org/v1/gonum/lapack/gonum/dsteqr.go | 376 +++ .../gonum.org/v1/gonum/lapack/gonum/dsterf.go | 285 ++ .../gonum.org/v1/gonum/lapack/gonum/dsyev.go | 130 + .../gonum.org/v1/gonum/lapack/gonum/dsytd2.go | 147 + .../gonum.org/v1/gonum/lapack/gonum/dsytrd.go | 184 + .../gonum.org/v1/gonum/lapack/gonum/dtbtrs.go | 77 + .../gonum.org/v1/gonum/lapack/gonum/dtgsja.go | 389 +++ .../gonum.org/v1/gonum/lapack/gonum/dtrcon.go | 90 + .../v1/gonum/lapack/gonum/dtrevc3.go | 894 +++++ .../gonum.org/v1/gonum/lapack/gonum/dtrexc.go | 230 ++ .../gonum.org/v1/gonum/lapack/gonum/dtrti2.go | 69 + .../gonum.org/v1/gonum/lapack/gonum/dtrtri.go | 72 + .../gonum.org/v1/gonum/lapack/gonum/dtrtrs.go | 55 + .../gonum.org/v1/gonum/lapack/gonum/errors.go | 183 + .../gonum.org/v1/gonum/lapack/gonum/iladlc.go | 45 + .../gonum.org/v1/gonum/lapack/gonum/iladlr.go | 41 + .../gonum.org/v1/gonum/lapack/gonum/ilaenv.go | 395 +++ .../gonum.org/v1/gonum/lapack/gonum/iparmq.go | 117 + .../gonum.org/v1/gonum/lapack/gonum/lapack.go | 64 + vendor/gonum.org/v1/gonum/lapack/lapack.go | 240 ++ .../gonum.org/v1/gonum/lapack/lapack64/doc.go | 20 + .../v1/gonum/lapack/lapack64/lapack64.go | 908 +++++ vendor/gonum.org/v1/gonum/mat/README.md | 6 + vendor/gonum.org/v1/gonum/mat/band.go | 368 ++ vendor/gonum.org/v1/gonum/mat/cdense.go | 368 ++ vendor/gonum.org/v1/gonum/mat/cholesky.go | 1203 +++++++ vendor/gonum.org/v1/gonum/mat/cmatrix.go | 314 ++ vendor/gonum.org/v1/gonum/mat/consts.go | 15 + vendor/gonum.org/v1/gonum/mat/dense.go | 670 ++++ .../v1/gonum/mat/dense_arithmetic.go | 877 +++++ vendor/gonum.org/v1/gonum/mat/diagonal.go | 342 ++ vendor/gonum.org/v1/gonum/mat/doc.go | 200 ++ vendor/gonum.org/v1/gonum/mat/eigen.go | 450 +++ vendor/gonum.org/v1/gonum/mat/errors.go | 154 + vendor/gonum.org/v1/gonum/mat/format.go | 516 +++ vendor/gonum.org/v1/gonum/mat/gsvd.go | 436 +++ vendor/gonum.org/v1/gonum/mat/hogsvd.go | 239 ++ .../v1/gonum/mat/index_bound_checks.go | 398 +++ .../v1/gonum/mat/index_no_bound_checks.go | 400 +++ vendor/gonum.org/v1/gonum/mat/inner.go | 126 + vendor/gonum.org/v1/gonum/mat/io.go | 495 +++ vendor/gonum.org/v1/gonum/mat/lq.go | 305 ++ vendor/gonum.org/v1/gonum/mat/lu.go | 487 +++ vendor/gonum.org/v1/gonum/mat/matrix.go | 1000 ++++++ vendor/gonum.org/v1/gonum/mat/offset.go | 32 + .../v1/gonum/mat/offset_appengine.go | 40 + vendor/gonum.org/v1/gonum/mat/pool.go | 260 ++ vendor/gonum.org/v1/gonum/mat/product.go | 193 ++ vendor/gonum.org/v1/gonum/mat/qr.go | 349 ++ vendor/gonum.org/v1/gonum/mat/shadow.go | 243 ++ .../gonum.org/v1/gonum/mat/shadow_common.go | 54 + .../gonum.org/v1/gonum/mat/shadow_complex.go | 72 + vendor/gonum.org/v1/gonum/mat/solve.go | 124 + vendor/gonum.org/v1/gonum/mat/svd.go | 425 +++ vendor/gonum.org/v1/gonum/mat/symband.go | 312 ++ vendor/gonum.org/v1/gonum/mat/symmetric.go | 698 ++++ vendor/gonum.org/v1/gonum/mat/triangular.go | 832 +++++ vendor/gonum.org/v1/gonum/mat/triband.go | 694 ++++ vendor/gonum.org/v1/gonum/mat/tridiag.go | 417 +++ vendor/gonum.org/v1/gonum/mat/vector.go | 855 +++++ vendor/gonum.org/v1/gonum/mathext/README.md | 6 + vendor/gonum.org/v1/gonum/mathext/airy.go | 41 + vendor/gonum.org/v1/gonum/mathext/beta.go | 40 + vendor/gonum.org/v1/gonum/mathext/betainc.go | 33 + vendor/gonum.org/v1/gonum/mathext/digamma.go | 45 + vendor/gonum.org/v1/gonum/mathext/doc.go | 7 + .../gonum.org/v1/gonum/mathext/ell_carlson.go | 168 + .../v1/gonum/mathext/ell_complete.go | 355 ++ vendor/gonum.org/v1/gonum/mathext/erf.go | 91 + .../gonum.org/v1/gonum/mathext/gamma_inc.go | 58 + .../v1/gonum/mathext/gamma_inc_inv.go | 58 + .../v1/gonum/mathext/internal/amos/amos.go | 2136 ++++++++++++ .../v1/gonum/mathext/internal/amos/doc.go | 6 + .../mathext/internal/amos/staticcheck.conf | 1 + .../gonum/mathext/internal/cephes/cephes.go | 28 + .../v1/gonum/mathext/internal/cephes/doc.go | 6 + .../v1/gonum/mathext/internal/cephes/igam.go | 320 ++ .../v1/gonum/mathext/internal/cephes/igami.go | 155 + .../gonum/mathext/internal/cephes/incbeta.go | 312 ++ .../v1/gonum/mathext/internal/cephes/incbi.go | 247 ++ .../gonum/mathext/internal/cephes/lanczos.go | 153 + .../v1/gonum/mathext/internal/cephes/ndtri.go | 150 + .../gonum/mathext/internal/cephes/polevl.go | 84 + .../mathext/internal/cephes/staticcheck.conf | 1 + .../v1/gonum/mathext/internal/cephes/unity.go | 184 + .../v1/gonum/mathext/internal/cephes/zeta.go | 117 + .../v1/gonum/mathext/internal/gonum/beta.go | 58 + .../v1/gonum/mathext/internal/gonum/doc.go | 7 + .../v1/gonum/mathext/internal/gonum/gonum.go | 5 + vendor/gonum.org/v1/gonum/mathext/mvgamma.go | 32 + vendor/gonum.org/v1/gonum/mathext/roots.go | 181 + vendor/gonum.org/v1/gonum/mathext/zeta.go | 22 + vendor/gonum.org/v1/gonum/optimize/README.md | 6 + .../v1/gonum/optimize/backtracking.go | 84 + vendor/gonum.org/v1/gonum/optimize/bfgs.go | 192 ++ .../gonum.org/v1/gonum/optimize/bisection.go | 146 + vendor/gonum.org/v1/gonum/optimize/cg.go | 368 ++ vendor/gonum.org/v1/gonum/optimize/cmaes.go | 468 +++ vendor/gonum.org/v1/gonum/optimize/doc.go | 6 + vendor/gonum.org/v1/gonum/optimize/errors.go | 78 + .../v1/gonum/optimize/functionconvergence.go | 85 + .../v1/gonum/optimize/gradientdescent.go | 95 + .../v1/gonum/optimize/guessandcheck.go | 92 + .../gonum.org/v1/gonum/optimize/interfaces.go | 132 + vendor/gonum.org/v1/gonum/optimize/lbfgs.go | 199 ++ .../gonum.org/v1/gonum/optimize/linesearch.go | 218 ++ .../gonum.org/v1/gonum/optimize/listsearch.go | 123 + vendor/gonum.org/v1/gonum/optimize/local.go | 146 + .../gonum.org/v1/gonum/optimize/minimize.go | 595 ++++ .../v1/gonum/optimize/morethuente.go | 387 +++ .../gonum.org/v1/gonum/optimize/neldermead.go | 348 ++ vendor/gonum.org/v1/gonum/optimize/newton.go | 182 + vendor/gonum.org/v1/gonum/optimize/printer.go | 108 + .../gonum.org/v1/gonum/optimize/stepsizers.go | 194 ++ .../v1/gonum/optimize/termination.go | 123 + vendor/gonum.org/v1/gonum/optimize/types.go | 273 ++ vendor/gonum.org/v1/gonum/spatial/r1/doc.go | 6 + .../gonum.org/v1/gonum/spatial/r1/interval.go | 10 + vendor/gonum.org/v1/gonum/stat/README.md | 6 + .../gonum.org/v1/gonum/stat/combin/combin.go | 683 ++++ vendor/gonum.org/v1/gonum/stat/combin/doc.go | 7 + .../v1/gonum/stat/distmv/dirichlet.go | 149 + .../gonum.org/v1/gonum/stat/distmv/distmv.go | 28 + vendor/gonum.org/v1/gonum/stat/distmv/doc.go | 6 + .../v1/gonum/stat/distmv/interfaces.go | 35 + .../gonum.org/v1/gonum/stat/distmv/normal.go | 524 +++ .../v1/gonum/stat/distmv/statdist.go | 390 +++ .../v1/gonum/stat/distmv/studentst.go | 362 ++ .../gonum.org/v1/gonum/stat/distmv/uniform.go | 200 ++ .../v1/gonum/stat/distuv/alphastable.go | 112 + .../v1/gonum/stat/distuv/bernoulli.go | 140 + vendor/gonum.org/v1/gonum/stat/distuv/beta.go | 151 + .../v1/gonum/stat/distuv/binomial.go | 189 ++ .../v1/gonum/stat/distuv/categorical.go | 184 + vendor/gonum.org/v1/gonum/stat/distuv/chi.go | 124 + .../v1/gonum/stat/distuv/chisquared.go | 101 + .../v1/gonum/stat/distuv/constants.go | 28 + vendor/gonum.org/v1/gonum/stat/distuv/doc.go | 6 + .../v1/gonum/stat/distuv/exponential.go | 266 ++ vendor/gonum.org/v1/gonum/stat/distuv/f.go | 134 + .../gonum.org/v1/gonum/stat/distuv/gamma.go | 203 ++ .../gonum.org/v1/gonum/stat/distuv/general.go | 24 + .../gonum.org/v1/gonum/stat/distuv/gumbel.go | 118 + .../v1/gonum/stat/distuv/interfaces.go | 32 + .../v1/gonum/stat/distuv/inversegamma.go | 123 + .../gonum.org/v1/gonum/stat/distuv/laplace.go | 267 ++ .../v1/gonum/stat/distuv/logistic.go | 98 + .../v1/gonum/stat/distuv/lognormal.go | 113 + vendor/gonum.org/v1/gonum/stat/distuv/norm.go | 263 ++ .../gonum.org/v1/gonum/stat/distuv/pareto.go | 130 + .../gonum.org/v1/gonum/stat/distuv/poisson.go | 144 + .../v1/gonum/stat/distuv/statdist.go | 142 + .../v1/gonum/stat/distuv/studentst.go | 161 + .../v1/gonum/stat/distuv/triangle.go | 278 ++ .../gonum.org/v1/gonum/stat/distuv/uniform.go | 210 ++ .../gonum.org/v1/gonum/stat/distuv/weibull.go | 231 ++ vendor/gonum.org/v1/gonum/stat/doc.go | 6 + vendor/gonum.org/v1/gonum/stat/pca_cca.go | 317 ++ vendor/gonum.org/v1/gonum/stat/roc.go | 198 ++ vendor/gonum.org/v1/gonum/stat/stat.go | 1400 ++++++++ vendor/gonum.org/v1/gonum/stat/statmat.go | 142 + vendor/modules.txt | 29 + 436 files changed, 95292 insertions(+), 5 deletions(-) create mode 100644 pkg/performanceprofile/profilecreator/autosize/autosize.go create mode 100644 vendor/golang.org/x/tools/container/intsets/sparse.go create mode 100644 vendor/gonum.org/v1/gonum/AUTHORS create mode 100644 vendor/gonum.org/v1/gonum/CONTRIBUTORS create mode 100644 vendor/gonum.org/v1/gonum/LICENSE create mode 100644 vendor/gonum.org/v1/gonum/blas/README.md create mode 100644 vendor/gonum.org/v1/gonum/blas/blas.go create mode 100644 vendor/gonum.org/v1/gonum/blas/blas64/blas64.go create mode 100644 vendor/gonum.org/v1/gonum/blas/blas64/conv.go create mode 100644 vendor/gonum.org/v1/gonum/blas/blas64/conv_symmetric.go create mode 100644 vendor/gonum.org/v1/gonum/blas/blas64/doc.go create mode 100644 vendor/gonum.org/v1/gonum/blas/cblas128/cblas128.go create mode 100644 vendor/gonum.org/v1/gonum/blas/cblas128/conv.go create mode 100644 vendor/gonum.org/v1/gonum/blas/cblas128/conv_hermitian.go create mode 100644 vendor/gonum.org/v1/gonum/blas/cblas128/conv_symmetric.go create mode 100644 vendor/gonum.org/v1/gonum/blas/cblas128/doc.go create mode 100644 vendor/gonum.org/v1/gonum/blas/conversions.bash create mode 100644 vendor/gonum.org/v1/gonum/blas/doc.go create mode 100644 vendor/gonum.org/v1/gonum/blas/gonum/dgemm.go create mode 100644 vendor/gonum.org/v1/gonum/blas/gonum/doc.go create mode 100644 vendor/gonum.org/v1/gonum/blas/gonum/errors.go create mode 100644 vendor/gonum.org/v1/gonum/blas/gonum/gonum.go create mode 100644 vendor/gonum.org/v1/gonum/blas/gonum/level1cmplx128.go create mode 100644 vendor/gonum.org/v1/gonum/blas/gonum/level1cmplx64.go create mode 100644 vendor/gonum.org/v1/gonum/blas/gonum/level1float32.go create mode 100644 vendor/gonum.org/v1/gonum/blas/gonum/level1float32_dsdot.go create mode 100644 vendor/gonum.org/v1/gonum/blas/gonum/level1float32_sdot.go create mode 100644 vendor/gonum.org/v1/gonum/blas/gonum/level1float32_sdsdot.go create mode 100644 vendor/gonum.org/v1/gonum/blas/gonum/level1float64.go create mode 100644 vendor/gonum.org/v1/gonum/blas/gonum/level1float64_ddot.go create mode 100644 vendor/gonum.org/v1/gonum/blas/gonum/level2cmplx128.go create mode 100644 vendor/gonum.org/v1/gonum/blas/gonum/level2cmplx64.go create mode 100644 vendor/gonum.org/v1/gonum/blas/gonum/level2float32.go create mode 100644 vendor/gonum.org/v1/gonum/blas/gonum/level2float64.go create mode 100644 vendor/gonum.org/v1/gonum/blas/gonum/level3cmplx128.go create mode 100644 vendor/gonum.org/v1/gonum/blas/gonum/level3cmplx64.go create mode 100644 vendor/gonum.org/v1/gonum/blas/gonum/level3float32.go create mode 100644 vendor/gonum.org/v1/gonum/blas/gonum/level3float64.go create mode 100644 vendor/gonum.org/v1/gonum/blas/gonum/sgemm.go create mode 100644 vendor/gonum.org/v1/gonum/blas/gonum/single_precision.bash create mode 100644 vendor/gonum.org/v1/gonum/floats/README.md create mode 100644 vendor/gonum.org/v1/gonum/floats/doc.go create mode 100644 vendor/gonum.org/v1/gonum/floats/floats.go create mode 100644 vendor/gonum.org/v1/gonum/floats/scalar/doc.go create mode 100644 vendor/gonum.org/v1/gonum/floats/scalar/scalar.go create mode 100644 vendor/gonum.org/v1/gonum/internal/asm/c128/axpyinc_amd64.s create mode 100644 vendor/gonum.org/v1/gonum/internal/asm/c128/axpyincto_amd64.s create mode 100644 vendor/gonum.org/v1/gonum/internal/asm/c128/axpyunitary_amd64.s create mode 100644 vendor/gonum.org/v1/gonum/internal/asm/c128/axpyunitaryto_amd64.s create mode 100644 vendor/gonum.org/v1/gonum/internal/asm/c128/doc.go create mode 100644 vendor/gonum.org/v1/gonum/internal/asm/c128/dotcinc_amd64.s create mode 100644 vendor/gonum.org/v1/gonum/internal/asm/c128/dotcunitary_amd64.s create mode 100644 vendor/gonum.org/v1/gonum/internal/asm/c128/dotuinc_amd64.s create mode 100644 vendor/gonum.org/v1/gonum/internal/asm/c128/dotuunitary_amd64.s create mode 100644 vendor/gonum.org/v1/gonum/internal/asm/c128/dscalinc_amd64.s create mode 100644 vendor/gonum.org/v1/gonum/internal/asm/c128/dscalunitary_amd64.s create mode 100644 vendor/gonum.org/v1/gonum/internal/asm/c128/scal.go create mode 100644 vendor/gonum.org/v1/gonum/internal/asm/c128/scalUnitary_amd64.s create mode 100644 vendor/gonum.org/v1/gonum/internal/asm/c128/scalinc_amd64.s create mode 100644 vendor/gonum.org/v1/gonum/internal/asm/c128/stubs.go create mode 100644 vendor/gonum.org/v1/gonum/internal/asm/c128/stubs_amd64.go create mode 100644 vendor/gonum.org/v1/gonum/internal/asm/c128/stubs_noasm.go create mode 100644 vendor/gonum.org/v1/gonum/internal/asm/c64/axpyinc_amd64.s create mode 100644 vendor/gonum.org/v1/gonum/internal/asm/c64/axpyincto_amd64.s create mode 100644 vendor/gonum.org/v1/gonum/internal/asm/c64/axpyunitary_amd64.s create mode 100644 vendor/gonum.org/v1/gonum/internal/asm/c64/axpyunitaryto_amd64.s create mode 100644 vendor/gonum.org/v1/gonum/internal/asm/c64/conj.go create mode 100644 vendor/gonum.org/v1/gonum/internal/asm/c64/doc.go create mode 100644 vendor/gonum.org/v1/gonum/internal/asm/c64/dotcinc_amd64.s create mode 100644 vendor/gonum.org/v1/gonum/internal/asm/c64/dotcunitary_amd64.s create mode 100644 vendor/gonum.org/v1/gonum/internal/asm/c64/dotuinc_amd64.s create mode 100644 vendor/gonum.org/v1/gonum/internal/asm/c64/dotuunitary_amd64.s create mode 100644 vendor/gonum.org/v1/gonum/internal/asm/c64/scal.go create mode 100644 vendor/gonum.org/v1/gonum/internal/asm/c64/stubs.go create mode 100644 vendor/gonum.org/v1/gonum/internal/asm/c64/stubs_amd64.go create mode 100644 vendor/gonum.org/v1/gonum/internal/asm/c64/stubs_noasm.go create mode 100644 vendor/gonum.org/v1/gonum/internal/asm/f32/axpyinc_amd64.s create mode 100644 vendor/gonum.org/v1/gonum/internal/asm/f32/axpyincto_amd64.s create mode 100644 vendor/gonum.org/v1/gonum/internal/asm/f32/axpyunitary_amd64.s create mode 100644 vendor/gonum.org/v1/gonum/internal/asm/f32/axpyunitaryto_amd64.s create mode 100644 vendor/gonum.org/v1/gonum/internal/asm/f32/ddotinc_amd64.s create mode 100644 vendor/gonum.org/v1/gonum/internal/asm/f32/ddotunitary_amd64.s create mode 100644 vendor/gonum.org/v1/gonum/internal/asm/f32/doc.go create mode 100644 vendor/gonum.org/v1/gonum/internal/asm/f32/dotinc_amd64.s create mode 100644 vendor/gonum.org/v1/gonum/internal/asm/f32/dotunitary_amd64.s create mode 100644 vendor/gonum.org/v1/gonum/internal/asm/f32/ge_amd64.go create mode 100644 vendor/gonum.org/v1/gonum/internal/asm/f32/ge_amd64.s create mode 100644 vendor/gonum.org/v1/gonum/internal/asm/f32/ge_noasm.go create mode 100644 vendor/gonum.org/v1/gonum/internal/asm/f32/gemv.go create mode 100644 vendor/gonum.org/v1/gonum/internal/asm/f32/l2norm.go create mode 100644 vendor/gonum.org/v1/gonum/internal/asm/f32/scal.go create mode 100644 vendor/gonum.org/v1/gonum/internal/asm/f32/stubs_amd64.go create mode 100644 vendor/gonum.org/v1/gonum/internal/asm/f32/stubs_noasm.go create mode 100644 vendor/gonum.org/v1/gonum/internal/asm/f32/sum_amd64.s create mode 100644 vendor/gonum.org/v1/gonum/internal/asm/f64/abssum_amd64.s create mode 100644 vendor/gonum.org/v1/gonum/internal/asm/f64/abssuminc_amd64.s create mode 100644 vendor/gonum.org/v1/gonum/internal/asm/f64/add_amd64.s create mode 100644 vendor/gonum.org/v1/gonum/internal/asm/f64/addconst_amd64.s create mode 100644 vendor/gonum.org/v1/gonum/internal/asm/f64/axpy.go create mode 100644 vendor/gonum.org/v1/gonum/internal/asm/f64/axpyinc_amd64.s create mode 100644 vendor/gonum.org/v1/gonum/internal/asm/f64/axpyincto_amd64.s create mode 100644 vendor/gonum.org/v1/gonum/internal/asm/f64/axpyunitary_amd64.s create mode 100644 vendor/gonum.org/v1/gonum/internal/asm/f64/axpyunitaryto_amd64.s create mode 100644 vendor/gonum.org/v1/gonum/internal/asm/f64/cumprod_amd64.s create mode 100644 vendor/gonum.org/v1/gonum/internal/asm/f64/cumsum_amd64.s create mode 100644 vendor/gonum.org/v1/gonum/internal/asm/f64/div_amd64.s create mode 100644 vendor/gonum.org/v1/gonum/internal/asm/f64/divto_amd64.s create mode 100644 vendor/gonum.org/v1/gonum/internal/asm/f64/doc.go create mode 100644 vendor/gonum.org/v1/gonum/internal/asm/f64/dot.go create mode 100644 vendor/gonum.org/v1/gonum/internal/asm/f64/dot_amd64.s create mode 100644 vendor/gonum.org/v1/gonum/internal/asm/f64/ge_amd64.go create mode 100644 vendor/gonum.org/v1/gonum/internal/asm/f64/ge_noasm.go create mode 100644 vendor/gonum.org/v1/gonum/internal/asm/f64/gemvN_amd64.s create mode 100644 vendor/gonum.org/v1/gonum/internal/asm/f64/gemvT_amd64.s create mode 100644 vendor/gonum.org/v1/gonum/internal/asm/f64/ger_amd64.s create mode 100644 vendor/gonum.org/v1/gonum/internal/asm/f64/l1norm_amd64.s create mode 100644 vendor/gonum.org/v1/gonum/internal/asm/f64/l2norm_amd64.s create mode 100644 vendor/gonum.org/v1/gonum/internal/asm/f64/l2norm_noasm.go create mode 100644 vendor/gonum.org/v1/gonum/internal/asm/f64/l2normdist_amd64.s create mode 100644 vendor/gonum.org/v1/gonum/internal/asm/f64/l2norminc_amd64.s create mode 100644 vendor/gonum.org/v1/gonum/internal/asm/f64/linfnorm_amd64.s create mode 100644 vendor/gonum.org/v1/gonum/internal/asm/f64/scal.go create mode 100644 vendor/gonum.org/v1/gonum/internal/asm/f64/scalinc_amd64.s create mode 100644 vendor/gonum.org/v1/gonum/internal/asm/f64/scalincto_amd64.s create mode 100644 vendor/gonum.org/v1/gonum/internal/asm/f64/scalunitary_amd64.s create mode 100644 vendor/gonum.org/v1/gonum/internal/asm/f64/scalunitaryto_amd64.s create mode 100644 vendor/gonum.org/v1/gonum/internal/asm/f64/stubs_amd64.go create mode 100644 vendor/gonum.org/v1/gonum/internal/asm/f64/stubs_noasm.go create mode 100644 vendor/gonum.org/v1/gonum/internal/asm/f64/sum_amd64.s create mode 100644 vendor/gonum.org/v1/gonum/internal/cmplx64/abs.go create mode 100644 vendor/gonum.org/v1/gonum/internal/cmplx64/conj.go create mode 100644 vendor/gonum.org/v1/gonum/internal/cmplx64/doc.go create mode 100644 vendor/gonum.org/v1/gonum/internal/cmplx64/isinf.go create mode 100644 vendor/gonum.org/v1/gonum/internal/cmplx64/isnan.go create mode 100644 vendor/gonum.org/v1/gonum/internal/cmplx64/sqrt.go create mode 100644 vendor/gonum.org/v1/gonum/internal/math32/doc.go create mode 100644 vendor/gonum.org/v1/gonum/internal/math32/math.go create mode 100644 vendor/gonum.org/v1/gonum/internal/math32/signbit.go create mode 100644 vendor/gonum.org/v1/gonum/internal/math32/sqrt.go create mode 100644 vendor/gonum.org/v1/gonum/internal/math32/sqrt_amd64.go create mode 100644 vendor/gonum.org/v1/gonum/internal/math32/sqrt_amd64.s create mode 100644 vendor/gonum.org/v1/gonum/internal/math32/sqrt_arm64.go create mode 100644 vendor/gonum.org/v1/gonum/internal/math32/sqrt_arm64.s create mode 100644 vendor/gonum.org/v1/gonum/lapack/.gitignore create mode 100644 vendor/gonum.org/v1/gonum/lapack/README.md create mode 100644 vendor/gonum.org/v1/gonum/lapack/doc.go create mode 100644 vendor/gonum.org/v1/gonum/lapack/gonum/dbdsqr.go create mode 100644 vendor/gonum.org/v1/gonum/lapack/gonum/dgebak.go create mode 100644 vendor/gonum.org/v1/gonum/lapack/gonum/dgebal.go create mode 100644 vendor/gonum.org/v1/gonum/lapack/gonum/dgebd2.go create mode 100644 vendor/gonum.org/v1/gonum/lapack/gonum/dgebrd.go create mode 100644 vendor/gonum.org/v1/gonum/lapack/gonum/dgecon.go create mode 100644 vendor/gonum.org/v1/gonum/lapack/gonum/dgeev.go create mode 100644 vendor/gonum.org/v1/gonum/lapack/gonum/dgehd2.go create mode 100644 vendor/gonum.org/v1/gonum/lapack/gonum/dgehrd.go create mode 100644 vendor/gonum.org/v1/gonum/lapack/gonum/dgelq2.go create mode 100644 vendor/gonum.org/v1/gonum/lapack/gonum/dgelqf.go create mode 100644 vendor/gonum.org/v1/gonum/lapack/gonum/dgels.go create mode 100644 vendor/gonum.org/v1/gonum/lapack/gonum/dgeql2.go create mode 100644 vendor/gonum.org/v1/gonum/lapack/gonum/dgeqp3.go create mode 100644 vendor/gonum.org/v1/gonum/lapack/gonum/dgeqr2.go create mode 100644 vendor/gonum.org/v1/gonum/lapack/gonum/dgeqrf.go create mode 100644 vendor/gonum.org/v1/gonum/lapack/gonum/dgerq2.go create mode 100644 vendor/gonum.org/v1/gonum/lapack/gonum/dgerqf.go create mode 100644 vendor/gonum.org/v1/gonum/lapack/gonum/dgesc2.go create mode 100644 vendor/gonum.org/v1/gonum/lapack/gonum/dgesv.go create mode 100644 vendor/gonum.org/v1/gonum/lapack/gonum/dgesvd.go create mode 100644 vendor/gonum.org/v1/gonum/lapack/gonum/dgetc2.go create mode 100644 vendor/gonum.org/v1/gonum/lapack/gonum/dgetf2.go create mode 100644 vendor/gonum.org/v1/gonum/lapack/gonum/dgetrf.go create mode 100644 vendor/gonum.org/v1/gonum/lapack/gonum/dgetri.go create mode 100644 vendor/gonum.org/v1/gonum/lapack/gonum/dgetrs.go create mode 100644 vendor/gonum.org/v1/gonum/lapack/gonum/dgghrd.go create mode 100644 vendor/gonum.org/v1/gonum/lapack/gonum/dggsvd3.go create mode 100644 vendor/gonum.org/v1/gonum/lapack/gonum/dggsvp3.go create mode 100644 vendor/gonum.org/v1/gonum/lapack/gonum/dgtsv.go create mode 100644 vendor/gonum.org/v1/gonum/lapack/gonum/dhseqr.go create mode 100644 vendor/gonum.org/v1/gonum/lapack/gonum/dlabrd.go create mode 100644 vendor/gonum.org/v1/gonum/lapack/gonum/dlacn2.go create mode 100644 vendor/gonum.org/v1/gonum/lapack/gonum/dlacpy.go create mode 100644 vendor/gonum.org/v1/gonum/lapack/gonum/dlae2.go create mode 100644 vendor/gonum.org/v1/gonum/lapack/gonum/dlaev2.go create mode 100644 vendor/gonum.org/v1/gonum/lapack/gonum/dlaexc.go create mode 100644 vendor/gonum.org/v1/gonum/lapack/gonum/dlag2.go create mode 100644 vendor/gonum.org/v1/gonum/lapack/gonum/dlags2.go create mode 100644 vendor/gonum.org/v1/gonum/lapack/gonum/dlagtm.go create mode 100644 vendor/gonum.org/v1/gonum/lapack/gonum/dlahqr.go create mode 100644 vendor/gonum.org/v1/gonum/lapack/gonum/dlahr2.go create mode 100644 vendor/gonum.org/v1/gonum/lapack/gonum/dlaln2.go create mode 100644 vendor/gonum.org/v1/gonum/lapack/gonum/dlangb.go create mode 100644 vendor/gonum.org/v1/gonum/lapack/gonum/dlange.go create mode 100644 vendor/gonum.org/v1/gonum/lapack/gonum/dlangt.go create mode 100644 vendor/gonum.org/v1/gonum/lapack/gonum/dlanhs.go create mode 100644 vendor/gonum.org/v1/gonum/lapack/gonum/dlansb.go create mode 100644 vendor/gonum.org/v1/gonum/lapack/gonum/dlanst.go create mode 100644 vendor/gonum.org/v1/gonum/lapack/gonum/dlansy.go create mode 100644 vendor/gonum.org/v1/gonum/lapack/gonum/dlantb.go create mode 100644 vendor/gonum.org/v1/gonum/lapack/gonum/dlantr.go create mode 100644 vendor/gonum.org/v1/gonum/lapack/gonum/dlanv2.go create mode 100644 vendor/gonum.org/v1/gonum/lapack/gonum/dlapll.go create mode 100644 vendor/gonum.org/v1/gonum/lapack/gonum/dlapmr.go create mode 100644 vendor/gonum.org/v1/gonum/lapack/gonum/dlapmt.go create mode 100644 vendor/gonum.org/v1/gonum/lapack/gonum/dlapy2.go create mode 100644 vendor/gonum.org/v1/gonum/lapack/gonum/dlaqp2.go create mode 100644 vendor/gonum.org/v1/gonum/lapack/gonum/dlaqps.go create mode 100644 vendor/gonum.org/v1/gonum/lapack/gonum/dlaqr04.go create mode 100644 vendor/gonum.org/v1/gonum/lapack/gonum/dlaqr1.go create mode 100644 vendor/gonum.org/v1/gonum/lapack/gonum/dlaqr23.go create mode 100644 vendor/gonum.org/v1/gonum/lapack/gonum/dlaqr5.go create mode 100644 vendor/gonum.org/v1/gonum/lapack/gonum/dlarf.go create mode 100644 vendor/gonum.org/v1/gonum/lapack/gonum/dlarfb.go create mode 100644 vendor/gonum.org/v1/gonum/lapack/gonum/dlarfg.go create mode 100644 vendor/gonum.org/v1/gonum/lapack/gonum/dlarft.go create mode 100644 vendor/gonum.org/v1/gonum/lapack/gonum/dlarfx.go create mode 100644 vendor/gonum.org/v1/gonum/lapack/gonum/dlartg.go create mode 100644 vendor/gonum.org/v1/gonum/lapack/gonum/dlas2.go create mode 100644 vendor/gonum.org/v1/gonum/lapack/gonum/dlascl.go create mode 100644 vendor/gonum.org/v1/gonum/lapack/gonum/dlaset.go create mode 100644 vendor/gonum.org/v1/gonum/lapack/gonum/dlasq1.go create mode 100644 vendor/gonum.org/v1/gonum/lapack/gonum/dlasq2.go create mode 100644 vendor/gonum.org/v1/gonum/lapack/gonum/dlasq3.go create mode 100644 vendor/gonum.org/v1/gonum/lapack/gonum/dlasq4.go create mode 100644 vendor/gonum.org/v1/gonum/lapack/gonum/dlasq5.go create mode 100644 vendor/gonum.org/v1/gonum/lapack/gonum/dlasq6.go create mode 100644 vendor/gonum.org/v1/gonum/lapack/gonum/dlasr.go create mode 100644 vendor/gonum.org/v1/gonum/lapack/gonum/dlasrt.go create mode 100644 vendor/gonum.org/v1/gonum/lapack/gonum/dlassq.go create mode 100644 vendor/gonum.org/v1/gonum/lapack/gonum/dlasv2.go create mode 100644 vendor/gonum.org/v1/gonum/lapack/gonum/dlaswp.go create mode 100644 vendor/gonum.org/v1/gonum/lapack/gonum/dlasy2.go create mode 100644 vendor/gonum.org/v1/gonum/lapack/gonum/dlatbs.go create mode 100644 vendor/gonum.org/v1/gonum/lapack/gonum/dlatdf.go create mode 100644 vendor/gonum.org/v1/gonum/lapack/gonum/dlatrd.go create mode 100644 vendor/gonum.org/v1/gonum/lapack/gonum/dlatrs.go create mode 100644 vendor/gonum.org/v1/gonum/lapack/gonum/dlauu2.go create mode 100644 vendor/gonum.org/v1/gonum/lapack/gonum/dlauum.go create mode 100644 vendor/gonum.org/v1/gonum/lapack/gonum/doc.go create mode 100644 vendor/gonum.org/v1/gonum/lapack/gonum/dorg2l.go create mode 100644 vendor/gonum.org/v1/gonum/lapack/gonum/dorg2r.go create mode 100644 vendor/gonum.org/v1/gonum/lapack/gonum/dorgbr.go create mode 100644 vendor/gonum.org/v1/gonum/lapack/gonum/dorghr.go create mode 100644 vendor/gonum.org/v1/gonum/lapack/gonum/dorgl2.go create mode 100644 vendor/gonum.org/v1/gonum/lapack/gonum/dorglq.go create mode 100644 vendor/gonum.org/v1/gonum/lapack/gonum/dorgql.go create mode 100644 vendor/gonum.org/v1/gonum/lapack/gonum/dorgqr.go create mode 100644 vendor/gonum.org/v1/gonum/lapack/gonum/dorgr2.go create mode 100644 vendor/gonum.org/v1/gonum/lapack/gonum/dorgtr.go create mode 100644 vendor/gonum.org/v1/gonum/lapack/gonum/dorm2r.go create mode 100644 vendor/gonum.org/v1/gonum/lapack/gonum/dormbr.go create mode 100644 vendor/gonum.org/v1/gonum/lapack/gonum/dormhr.go create mode 100644 vendor/gonum.org/v1/gonum/lapack/gonum/dorml2.go create mode 100644 vendor/gonum.org/v1/gonum/lapack/gonum/dormlq.go create mode 100644 vendor/gonum.org/v1/gonum/lapack/gonum/dormqr.go create mode 100644 vendor/gonum.org/v1/gonum/lapack/gonum/dormr2.go create mode 100644 vendor/gonum.org/v1/gonum/lapack/gonum/dpbcon.go create mode 100644 vendor/gonum.org/v1/gonum/lapack/gonum/dpbtf2.go create mode 100644 vendor/gonum.org/v1/gonum/lapack/gonum/dpbtrf.go create mode 100644 vendor/gonum.org/v1/gonum/lapack/gonum/dpbtrs.go create mode 100644 vendor/gonum.org/v1/gonum/lapack/gonum/dpocon.go create mode 100644 vendor/gonum.org/v1/gonum/lapack/gonum/dpotf2.go create mode 100644 vendor/gonum.org/v1/gonum/lapack/gonum/dpotrf.go create mode 100644 vendor/gonum.org/v1/gonum/lapack/gonum/dpotri.go create mode 100644 vendor/gonum.org/v1/gonum/lapack/gonum/dpotrs.go create mode 100644 vendor/gonum.org/v1/gonum/lapack/gonum/dpstf2.go create mode 100644 vendor/gonum.org/v1/gonum/lapack/gonum/dpstrf.go create mode 100644 vendor/gonum.org/v1/gonum/lapack/gonum/dptcon.go create mode 100644 vendor/gonum.org/v1/gonum/lapack/gonum/dptsv.go create mode 100644 vendor/gonum.org/v1/gonum/lapack/gonum/dpttrf.go create mode 100644 vendor/gonum.org/v1/gonum/lapack/gonum/dpttrs.go create mode 100644 vendor/gonum.org/v1/gonum/lapack/gonum/dptts2.go create mode 100644 vendor/gonum.org/v1/gonum/lapack/gonum/drscl.go create mode 100644 vendor/gonum.org/v1/gonum/lapack/gonum/dsteqr.go create mode 100644 vendor/gonum.org/v1/gonum/lapack/gonum/dsterf.go create mode 100644 vendor/gonum.org/v1/gonum/lapack/gonum/dsyev.go create mode 100644 vendor/gonum.org/v1/gonum/lapack/gonum/dsytd2.go create mode 100644 vendor/gonum.org/v1/gonum/lapack/gonum/dsytrd.go create mode 100644 vendor/gonum.org/v1/gonum/lapack/gonum/dtbtrs.go create mode 100644 vendor/gonum.org/v1/gonum/lapack/gonum/dtgsja.go create mode 100644 vendor/gonum.org/v1/gonum/lapack/gonum/dtrcon.go create mode 100644 vendor/gonum.org/v1/gonum/lapack/gonum/dtrevc3.go create mode 100644 vendor/gonum.org/v1/gonum/lapack/gonum/dtrexc.go create mode 100644 vendor/gonum.org/v1/gonum/lapack/gonum/dtrti2.go create mode 100644 vendor/gonum.org/v1/gonum/lapack/gonum/dtrtri.go create mode 100644 vendor/gonum.org/v1/gonum/lapack/gonum/dtrtrs.go create mode 100644 vendor/gonum.org/v1/gonum/lapack/gonum/errors.go create mode 100644 vendor/gonum.org/v1/gonum/lapack/gonum/iladlc.go create mode 100644 vendor/gonum.org/v1/gonum/lapack/gonum/iladlr.go create mode 100644 vendor/gonum.org/v1/gonum/lapack/gonum/ilaenv.go create mode 100644 vendor/gonum.org/v1/gonum/lapack/gonum/iparmq.go create mode 100644 vendor/gonum.org/v1/gonum/lapack/gonum/lapack.go create mode 100644 vendor/gonum.org/v1/gonum/lapack/lapack.go create mode 100644 vendor/gonum.org/v1/gonum/lapack/lapack64/doc.go create mode 100644 vendor/gonum.org/v1/gonum/lapack/lapack64/lapack64.go create mode 100644 vendor/gonum.org/v1/gonum/mat/README.md create mode 100644 vendor/gonum.org/v1/gonum/mat/band.go create mode 100644 vendor/gonum.org/v1/gonum/mat/cdense.go create mode 100644 vendor/gonum.org/v1/gonum/mat/cholesky.go create mode 100644 vendor/gonum.org/v1/gonum/mat/cmatrix.go create mode 100644 vendor/gonum.org/v1/gonum/mat/consts.go create mode 100644 vendor/gonum.org/v1/gonum/mat/dense.go create mode 100644 vendor/gonum.org/v1/gonum/mat/dense_arithmetic.go create mode 100644 vendor/gonum.org/v1/gonum/mat/diagonal.go create mode 100644 vendor/gonum.org/v1/gonum/mat/doc.go create mode 100644 vendor/gonum.org/v1/gonum/mat/eigen.go create mode 100644 vendor/gonum.org/v1/gonum/mat/errors.go create mode 100644 vendor/gonum.org/v1/gonum/mat/format.go create mode 100644 vendor/gonum.org/v1/gonum/mat/gsvd.go create mode 100644 vendor/gonum.org/v1/gonum/mat/hogsvd.go create mode 100644 vendor/gonum.org/v1/gonum/mat/index_bound_checks.go create mode 100644 vendor/gonum.org/v1/gonum/mat/index_no_bound_checks.go create mode 100644 vendor/gonum.org/v1/gonum/mat/inner.go create mode 100644 vendor/gonum.org/v1/gonum/mat/io.go create mode 100644 vendor/gonum.org/v1/gonum/mat/lq.go create mode 100644 vendor/gonum.org/v1/gonum/mat/lu.go create mode 100644 vendor/gonum.org/v1/gonum/mat/matrix.go create mode 100644 vendor/gonum.org/v1/gonum/mat/offset.go create mode 100644 vendor/gonum.org/v1/gonum/mat/offset_appengine.go create mode 100644 vendor/gonum.org/v1/gonum/mat/pool.go create mode 100644 vendor/gonum.org/v1/gonum/mat/product.go create mode 100644 vendor/gonum.org/v1/gonum/mat/qr.go create mode 100644 vendor/gonum.org/v1/gonum/mat/shadow.go create mode 100644 vendor/gonum.org/v1/gonum/mat/shadow_common.go create mode 100644 vendor/gonum.org/v1/gonum/mat/shadow_complex.go create mode 100644 vendor/gonum.org/v1/gonum/mat/solve.go create mode 100644 vendor/gonum.org/v1/gonum/mat/svd.go create mode 100644 vendor/gonum.org/v1/gonum/mat/symband.go create mode 100644 vendor/gonum.org/v1/gonum/mat/symmetric.go create mode 100644 vendor/gonum.org/v1/gonum/mat/triangular.go create mode 100644 vendor/gonum.org/v1/gonum/mat/triband.go create mode 100644 vendor/gonum.org/v1/gonum/mat/tridiag.go create mode 100644 vendor/gonum.org/v1/gonum/mat/vector.go create mode 100644 vendor/gonum.org/v1/gonum/mathext/README.md create mode 100644 vendor/gonum.org/v1/gonum/mathext/airy.go create mode 100644 vendor/gonum.org/v1/gonum/mathext/beta.go create mode 100644 vendor/gonum.org/v1/gonum/mathext/betainc.go create mode 100644 vendor/gonum.org/v1/gonum/mathext/digamma.go create mode 100644 vendor/gonum.org/v1/gonum/mathext/doc.go create mode 100644 vendor/gonum.org/v1/gonum/mathext/ell_carlson.go create mode 100644 vendor/gonum.org/v1/gonum/mathext/ell_complete.go create mode 100644 vendor/gonum.org/v1/gonum/mathext/erf.go create mode 100644 vendor/gonum.org/v1/gonum/mathext/gamma_inc.go create mode 100644 vendor/gonum.org/v1/gonum/mathext/gamma_inc_inv.go create mode 100644 vendor/gonum.org/v1/gonum/mathext/internal/amos/amos.go create mode 100644 vendor/gonum.org/v1/gonum/mathext/internal/amos/doc.go create mode 100644 vendor/gonum.org/v1/gonum/mathext/internal/amos/staticcheck.conf create mode 100644 vendor/gonum.org/v1/gonum/mathext/internal/cephes/cephes.go create mode 100644 vendor/gonum.org/v1/gonum/mathext/internal/cephes/doc.go create mode 100644 vendor/gonum.org/v1/gonum/mathext/internal/cephes/igam.go create mode 100644 vendor/gonum.org/v1/gonum/mathext/internal/cephes/igami.go create mode 100644 vendor/gonum.org/v1/gonum/mathext/internal/cephes/incbeta.go create mode 100644 vendor/gonum.org/v1/gonum/mathext/internal/cephes/incbi.go create mode 100644 vendor/gonum.org/v1/gonum/mathext/internal/cephes/lanczos.go create mode 100644 vendor/gonum.org/v1/gonum/mathext/internal/cephes/ndtri.go create mode 100644 vendor/gonum.org/v1/gonum/mathext/internal/cephes/polevl.go create mode 100644 vendor/gonum.org/v1/gonum/mathext/internal/cephes/staticcheck.conf create mode 100644 vendor/gonum.org/v1/gonum/mathext/internal/cephes/unity.go create mode 100644 vendor/gonum.org/v1/gonum/mathext/internal/cephes/zeta.go create mode 100644 vendor/gonum.org/v1/gonum/mathext/internal/gonum/beta.go create mode 100644 vendor/gonum.org/v1/gonum/mathext/internal/gonum/doc.go create mode 100644 vendor/gonum.org/v1/gonum/mathext/internal/gonum/gonum.go create mode 100644 vendor/gonum.org/v1/gonum/mathext/mvgamma.go create mode 100644 vendor/gonum.org/v1/gonum/mathext/roots.go create mode 100644 vendor/gonum.org/v1/gonum/mathext/zeta.go create mode 100644 vendor/gonum.org/v1/gonum/optimize/README.md create mode 100644 vendor/gonum.org/v1/gonum/optimize/backtracking.go create mode 100644 vendor/gonum.org/v1/gonum/optimize/bfgs.go create mode 100644 vendor/gonum.org/v1/gonum/optimize/bisection.go create mode 100644 vendor/gonum.org/v1/gonum/optimize/cg.go create mode 100644 vendor/gonum.org/v1/gonum/optimize/cmaes.go create mode 100644 vendor/gonum.org/v1/gonum/optimize/doc.go create mode 100644 vendor/gonum.org/v1/gonum/optimize/errors.go create mode 100644 vendor/gonum.org/v1/gonum/optimize/functionconvergence.go create mode 100644 vendor/gonum.org/v1/gonum/optimize/gradientdescent.go create mode 100644 vendor/gonum.org/v1/gonum/optimize/guessandcheck.go create mode 100644 vendor/gonum.org/v1/gonum/optimize/interfaces.go create mode 100644 vendor/gonum.org/v1/gonum/optimize/lbfgs.go create mode 100644 vendor/gonum.org/v1/gonum/optimize/linesearch.go create mode 100644 vendor/gonum.org/v1/gonum/optimize/listsearch.go create mode 100644 vendor/gonum.org/v1/gonum/optimize/local.go create mode 100644 vendor/gonum.org/v1/gonum/optimize/minimize.go create mode 100644 vendor/gonum.org/v1/gonum/optimize/morethuente.go create mode 100644 vendor/gonum.org/v1/gonum/optimize/neldermead.go create mode 100644 vendor/gonum.org/v1/gonum/optimize/newton.go create mode 100644 vendor/gonum.org/v1/gonum/optimize/printer.go create mode 100644 vendor/gonum.org/v1/gonum/optimize/stepsizers.go create mode 100644 vendor/gonum.org/v1/gonum/optimize/termination.go create mode 100644 vendor/gonum.org/v1/gonum/optimize/types.go create mode 100644 vendor/gonum.org/v1/gonum/spatial/r1/doc.go create mode 100644 vendor/gonum.org/v1/gonum/spatial/r1/interval.go create mode 100644 vendor/gonum.org/v1/gonum/stat/README.md create mode 100644 vendor/gonum.org/v1/gonum/stat/combin/combin.go create mode 100644 vendor/gonum.org/v1/gonum/stat/combin/doc.go create mode 100644 vendor/gonum.org/v1/gonum/stat/distmv/dirichlet.go create mode 100644 vendor/gonum.org/v1/gonum/stat/distmv/distmv.go create mode 100644 vendor/gonum.org/v1/gonum/stat/distmv/doc.go create mode 100644 vendor/gonum.org/v1/gonum/stat/distmv/interfaces.go create mode 100644 vendor/gonum.org/v1/gonum/stat/distmv/normal.go create mode 100644 vendor/gonum.org/v1/gonum/stat/distmv/statdist.go create mode 100644 vendor/gonum.org/v1/gonum/stat/distmv/studentst.go create mode 100644 vendor/gonum.org/v1/gonum/stat/distmv/uniform.go create mode 100644 vendor/gonum.org/v1/gonum/stat/distuv/alphastable.go create mode 100644 vendor/gonum.org/v1/gonum/stat/distuv/bernoulli.go create mode 100644 vendor/gonum.org/v1/gonum/stat/distuv/beta.go create mode 100644 vendor/gonum.org/v1/gonum/stat/distuv/binomial.go create mode 100644 vendor/gonum.org/v1/gonum/stat/distuv/categorical.go create mode 100644 vendor/gonum.org/v1/gonum/stat/distuv/chi.go create mode 100644 vendor/gonum.org/v1/gonum/stat/distuv/chisquared.go create mode 100644 vendor/gonum.org/v1/gonum/stat/distuv/constants.go create mode 100644 vendor/gonum.org/v1/gonum/stat/distuv/doc.go create mode 100644 vendor/gonum.org/v1/gonum/stat/distuv/exponential.go create mode 100644 vendor/gonum.org/v1/gonum/stat/distuv/f.go create mode 100644 vendor/gonum.org/v1/gonum/stat/distuv/gamma.go create mode 100644 vendor/gonum.org/v1/gonum/stat/distuv/general.go create mode 100644 vendor/gonum.org/v1/gonum/stat/distuv/gumbel.go create mode 100644 vendor/gonum.org/v1/gonum/stat/distuv/interfaces.go create mode 100644 vendor/gonum.org/v1/gonum/stat/distuv/inversegamma.go create mode 100644 vendor/gonum.org/v1/gonum/stat/distuv/laplace.go create mode 100644 vendor/gonum.org/v1/gonum/stat/distuv/logistic.go create mode 100644 vendor/gonum.org/v1/gonum/stat/distuv/lognormal.go create mode 100644 vendor/gonum.org/v1/gonum/stat/distuv/norm.go create mode 100644 vendor/gonum.org/v1/gonum/stat/distuv/pareto.go create mode 100644 vendor/gonum.org/v1/gonum/stat/distuv/poisson.go create mode 100644 vendor/gonum.org/v1/gonum/stat/distuv/statdist.go create mode 100644 vendor/gonum.org/v1/gonum/stat/distuv/studentst.go create mode 100644 vendor/gonum.org/v1/gonum/stat/distuv/triangle.go create mode 100644 vendor/gonum.org/v1/gonum/stat/distuv/uniform.go create mode 100644 vendor/gonum.org/v1/gonum/stat/distuv/weibull.go create mode 100644 vendor/gonum.org/v1/gonum/stat/doc.go create mode 100644 vendor/gonum.org/v1/gonum/stat/pca_cca.go create mode 100644 vendor/gonum.org/v1/gonum/stat/roc.go create mode 100644 vendor/gonum.org/v1/gonum/stat/stat.go create mode 100644 vendor/gonum.org/v1/gonum/stat/statmat.go diff --git a/go.mod b/go.mod index 2c289bb96b..d669946d2d 100644 --- a/go.mod +++ b/go.mod @@ -28,6 +28,7 @@ require ( github.com/prometheus/client_golang v1.21.1 github.com/spf13/cobra v1.9.1 github.com/spf13/pflag v1.0.6 + gonum.org/v1/gonum v0.16.0 gopkg.in/fsnotify.v1 v1.4.7 gopkg.in/ini.v1 v1.67.0 gopkg.in/yaml.v2 v2.4.0 diff --git a/go.sum b/go.sum index ea705f8a4a..38a9b9d2be 100644 --- a/go.sum +++ b/go.sum @@ -824,6 +824,8 @@ golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8T golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= gomodules.xyz/jsonpatch/v2 v2.4.0 h1:Ci3iUJyx9UeRx7CeFN8ARgGbkESwJK+KB9lLcWxY/Zw= gomodules.xyz/jsonpatch/v2 v2.4.0/go.mod h1:AH3dM2RI6uoBZxn3LVrfvJ3E0/9dG4cSrbuBJT4moAY= +gonum.org/v1/gonum v0.16.0 h1:5+ul4Swaf3ESvrOnidPp4GZbzf0mxVQpDCYUQE7OJfk= +gonum.org/v1/gonum v0.16.0/go.mod h1:fef3am4MQ93R2HHpKnLk4/Tbh/s0+wqD5nfa6Pnwy4E= google.golang.org/api v0.4.0/go.mod h1:8k5glujaEP+g9n7WNsDg8QP6cUVNI86fCNMcbazEtwE= google.golang.org/api v0.7.0/go.mod h1:WtwebWUNSVBH/HAw79HIFXZNqEvBhG+Ra+ax0hx3E3M= google.golang.org/api v0.8.0/go.mod h1:o4eAsZoiT+ibD93RtjEohWalFOjRDx6CVaqeizhEnKg= diff --git a/pkg/performanceprofile/profilecreator/autosize/autosize.go b/pkg/performanceprofile/profilecreator/autosize/autosize.go new file mode 100644 index 0000000000..5be9800ec6 --- /dev/null +++ b/pkg/performanceprofile/profilecreator/autosize/autosize.go @@ -0,0 +1,246 @@ +package autosize + +import ( + "errors" + "fmt" + "log" + "math" + + "gonum.org/v1/gonum/optimize" + + "github.com/openshift/cluster-node-tuning-operator/pkg/performanceprofile/profilecreator" +) + +// Assumptions: +// 1. All the machines in the node pool have identical HW specs and need identical sizing. +// 2. We cannot distinguish between infra/OS CPU requirements and control plane CPU requirement. +// We will conflate the two costs in the latter. +// +// Definitions: +// x_c: CPUs for the control plane - includes x_i: CPUs for OS/Infra +// x_w: CPUs for the workload +// Tc: Total available CPUs (includes OS/Infra +// +// Hard Constraints: +// x_c, x_w are integers because we need to dedicate full cores +// x_c, x_w >= 0 +// x_c + x_w <= Tc +// x_c >= req(x_w) // control plane and infra cost is a function of the expected workload +// +// Objective: +// We want to maximize x_w, or, equivalently, minimize x_c + +const ( + defaultPenaltyWeight float64 = 100.0 + defaultReservedRatioInitial float64 = 0.0625 // 1/16. determined empirically. Use only as initial value. + defaultReservedRatioMax float64 = 0.25 // 1/4. determined empirically. This is the practical upper bound. + defaultControlPlaneWorkloadCoreRatio float64 = 0.075 // TODO: how much control plane/infra power do we need to support the workload? +) + +var ( + ErrUnderallocatedControlPlane = errors.New("not enough CPUs for control plane") + ErrOverallocatedControlPlane = errors.New("too many CPUs for control plane") + ErrInconsistentAllocation = errors.New("inconsistent CPus allocation") +) + +type Env struct { + Log *log.Logger +} + +func DefaultEnv() Env { + return Env{ + Log: profilecreator.GetAlertSink(), + } +} + +type Params struct { + OfflinedCPUCount int + UserLevelNetworking bool + MachineData *profilecreator.GHWHandler + // cached vars + totalCPUs int + smtLevel int +} + +func (p Params) String() string { + return fmt.Sprintf("cpus=%d offline=%v SMTLevel=%v", p.totalCPUs, p.OfflinedCPUCount, p.smtLevel) +} + +func setupMachineData(p *Params) error { + var err error + + cpus, err := p.MachineData.CPU() + if err != nil { + return err + } + + p.totalCPUs = int(cpus.TotalHardwareThreads) + // NOTE: this assumes all cores are equal, but it's a limitation also shared by GHW. CPUs with P/E cores will be misrepresented. + p.smtLevel = int(cpus.TotalHardwareThreads / cpus.TotalCores) + return nil +} + +func (p Params) TotalCPUs() int { + return p.totalCPUs +} + +func (p Params) SMTLevel() int { + return p.smtLevel +} + +func (p Params) DefaultControlPlaneCores() int { + // intentionally overallocate to have a safe baseline + Tc := p.totalCPUs + return int(math.Round(float64(Tc) * defaultReservedRatioInitial)) // TODO handle SMT +} + +// Get x_c, x_w as initial hardcoded value. Subject to optimization +func (p Params) DefaultAllocation() Values { + Tc := p.totalCPUs + x_c := p.DefaultControlPlaneCores() + return Values{ + ReservedCPUCount: x_c, + IsolatedCPUCount: Tc - x_c, + } +} + +func (p Params) initialValue() []float64 { + vals := p.DefaultAllocation() + return []float64{ + float64(vals.ReservedCPUCount), // x_c + float64(vals.IsolatedCPUCount), // x_w + } +} + +func (p Params) controlPlaneRequirement(x_w float64) float64 { + R := defaultControlPlaneWorkloadCoreRatio + if p.UserLevelNetworking { + R = 0.0 + } + // TODO: the most obvious relationship is for kernel level networking. + // We start with a linear relationship because its simplicity. + return float64(p.DefaultControlPlaneCores()) + R*x_w +} + +type Score struct { + Cost float64 // the lower the better +} + +func (sc Score) String() string { + val := -sc.Cost // positive values are easier to grasp + return fmt.Sprintf("optimization result: %.3f (higher is better)", val) +} + +type Values struct { + // we intentionally compute the recommended cpu count, not precise allocation, because + // this is better done by other packages. We may expose the precise allocation as hint + // or for reference purposes in the future + ReservedCPUCount int + IsolatedCPUCount int +} + +func (vals Values) String() string { + return fmt.Sprintf("reserved=%v/isolated=%v", vals.ReservedCPUCount, vals.IsolatedCPUCount) +} + +// gonum doesn't support bounds yet so we have to make this an explicit step +// https://github.com/gonum/gonum/issues/1725 +func Validate(params Params, vals Values) error { + Tc := params.TotalCPUs() + if vals.ReservedCPUCount < 1 { // TODO handle SMT + return ErrUnderallocatedControlPlane + } + if vals.ReservedCPUCount > int(math.Round((float64(Tc) * defaultReservedRatioMax))) { // works, but likely unacceptable + return ErrOverallocatedControlPlane + } + if Tc != vals.ReservedCPUCount+vals.IsolatedCPUCount { + return ErrInconsistentAllocation + } + return nil +} + +// Objective function to minimize. +// x[0] is x_c +// x[1] is x_w +func objective(p Params, x []float64) float64 { + x_c := x[0] + x_w := x[1] + + // Our original objective is to maximize x_w, so we minimize -x_w + target := -x_w + + // gonum doesn't support bounds yet so we have to use penalties: + // https://github.com/gonum/gonum/issues/1725 + + // Hard Constraints + var hardPenalty float64 + // Don't exceed total CPUs + hardPenalty += defaultPenaltyWeight * math.Pow(math.Max(0, x_c+x_w-float64(p.TotalCPUs())), 2) + + // Meet the control plane/infra requirement to avoid the workload to starve + hardPenalty += defaultPenaltyWeight * math.Pow(math.Max(0, p.controlPlaneRequirement(x_w)-x_c), 2) + + // Must use positive CPU values (since gonum/optimize doesn't have simple bounds for all solvers) + hardPenalty += defaultPenaltyWeight*math.Pow(math.Max(0, -x_c), 2) + math.Pow(math.Max(0, -x_w), 2) + + // Allocate in multiples of SMT level (usually 2) -- TODO: should be soft? + hardPenalty += defaultPenaltyWeight * math.Pow(math.Max(0, -float64(int(math.Round(x_c))%p.SMTLevel())), 2) + + return target + hardPenalty +} + +func Compute(env Env, params Params) (Values, Score, error) { + err := setupMachineData(¶ms) + if err != nil { + env.Log.Printf("Optimization failed: %v", err) + return params.DefaultAllocation(), Score{}, err + } + + problem := optimize.Problem{ + Func: func(x []float64) float64 { + return objective(params, x) + }, + } + + settings := &optimize.Settings{ + MajorIterations: 99, + } + + env.Log.Printf("Optimization start. Default allocation: %v", params.DefaultAllocation().String()) + env.Log.Printf("Optimization start. Params: %v", params.String()) + + result, err := optimize.Minimize(problem, params.initialValue(), settings, &optimize.NelderMead{}) + if err != nil { + env.Log.Printf("Optimization failed: %v", err) + return params.DefaultAllocation(), Score{}, err + } + + smtLevel := params.SMTLevel() + totCPUs := params.TotalCPUs() + score := Score{Cost: result.F} + x_cr := int(math.Round(result.Location.X[0])) + x_c := asMultipleOf(x_cr, smtLevel) + env.Log.Printf("Optimization value: Xc=%v -> Xc=%v (SMTLevel=%v)", x_cr, x_c, smtLevel) + + vals := Values{ + ReservedCPUCount: x_c, + IsolatedCPUCount: totCPUs - x_c, // we can use x_w, but we just leverage invariants + } + env.Log.Printf("Optimization result: %s", vals.String()) + + if err := Validate(params, vals); err != nil { + env.Log.Printf("Optimization invalid: %v", err) + return params.DefaultAllocation(), Score{}, err + } + + env.Log.Printf("Optimization done. Score: %v %s totalCPUs=%d", score.String(), vals.String(), totCPUs) + return vals, score, nil +} + +func asMultipleOf(v, x int) int { + r := v % x + if r == 0 { + return v + } + return v + r +} diff --git a/pkg/performanceprofile/profilecreator/cmd/root.go b/pkg/performanceprofile/profilecreator/cmd/root.go index 037bfad7b8..7b70f291ae 100644 --- a/pkg/performanceprofile/profilecreator/cmd/root.go +++ b/pkg/performanceprofile/profilecreator/cmd/root.go @@ -37,6 +37,7 @@ import ( machineconfigv1 "github.com/openshift/api/machineconfiguration/v1" performancev2 "github.com/openshift/cluster-node-tuning-operator/pkg/apis/performanceprofile/v2" "github.com/openshift/cluster-node-tuning-operator/pkg/performanceprofile/profilecreator" + "github.com/openshift/cluster-node-tuning-operator/pkg/performanceprofile/profilecreator/autosize" "github.com/openshift/cluster-node-tuning-operator/pkg/performanceprofile/profilecreator/cmd/hypershift" "github.com/openshift/cluster-node-tuning-operator/pkg/performanceprofile/profilecreator/serialize" "github.com/openshift/cluster-node-tuning-operator/pkg/performanceprofile/profilecreator/toleration" @@ -116,10 +117,10 @@ func NewRootCommand() *cobra.Command { pcArgs := &ProfileCreatorArgs{ UserLevelNetworking: ptr.To(false), PerPodPowerManagement: ptr.To(false), + Autosize: ptr.To(false), } var requiredFlags = []string{ - "reserved-cpu-count", "rt-kernel", "must-gather-dir-path", } @@ -164,10 +165,26 @@ func NewRootCommand() *cobra.Command { if err != nil { return fmt.Errorf("targeted nodes differ: %w", err) } + + sizing := autosize.Values{ + ReservedCPUCount: pcArgs.ReservedCPUCount, + } + if isAutosizeEnabled(pcArgs) { + params := autosize.Params{ + OfflinedCPUCount: pcArgs.OfflinedCPUCount, + UserLevelNetworking: (pcArgs.UserLevelNetworking != nil && *pcArgs.UserLevelNetworking), + MachineData: nodesHandlers[0], // assume all nodes equal, pick the easiest + } + sizing, _, err = autosize.Compute(autosize.DefaultEnv(), params) + if err != nil { + return fmt.Errorf("failed to autosize the cluster values: %v", err) + } + } + // We make sure that the matched Nodes are the same // Assumption here is moving forward matchedNodes[0] is representative of how all the nodes are // same from hardware topology point of view - profileData, err := makeProfileDataFrom(nodesHandlers[0], pcArgs) + profileData, err := makeProfileDataFrom(nodesHandlers[0], pcArgs, sizing) if err != nil { return fmt.Errorf("failed to make profile data from node handler: %w", err) } @@ -222,6 +239,9 @@ func validateProfileCreatorFlags(pcArgs *ProfileCreatorArgs) error { if pcArgs.MCPName != "" && pcArgs.NodePoolName != "" { return fmt.Errorf("--mcp-name and --node-pool-name options cannot be used together") } + if !isAutosizeEnabled(pcArgs) && pcArgs.ReservedCPUCount == 0 { + return fmt.Errorf("--reserved-cpu-count need to be set and greater than zero if autosizing (--autosize) is disabled") + } if pcArgs.NodePoolName == "" { // NodePoolName is an alias of MCPName pcArgs.NodePoolName = pcArgs.MCPName @@ -303,12 +323,13 @@ func makeClusterData(mustGatherDirPath string, createForHypershift bool) (Cluste return clusterData, nil } -func makeProfileDataFrom(nodeHandler *profilecreator.GHWHandler, args *ProfileCreatorArgs) (*ProfileData, error) { +func makeProfileDataFrom(nodeHandler *profilecreator.GHWHandler, args *ProfileCreatorArgs, sizing autosize.Values) (*ProfileData, error) { systemInfo, err := nodeHandler.GatherSystemInfo() if err != nil { return nil, fmt.Errorf("failed to compute get system information: %v", err) } - reservedCPUs, isolatedCPUs, offlinedCPUs, err := profilecreator.CalculateCPUSets(systemInfo, args.ReservedCPUCount, args.OfflinedCPUCount, args.SplitReservedCPUsAcrossNUMA, args.DisableHT, args.PowerConsumptionMode == ultraLowLatency) + + reservedCPUs, isolatedCPUs, offlinedCPUs, err := profilecreator.CalculateCPUSets(systemInfo, sizing.ReservedCPUCount, args.OfflinedCPUCount, args.SplitReservedCPUsAcrossNUMA, args.DisableHT, args.PowerConsumptionMode == ultraLowLatency) if err != nil { return nil, fmt.Errorf("failed to compute the reserved and isolated CPUs: %v", err) } @@ -411,13 +432,14 @@ type ProfileCreatorArgs struct { TMPolicy string `json:"topology-manager-policy"` PerPodPowerManagement *bool `json:"per-pod-power-management,omitempty"` EnableHardwareTuning bool `json:"enable-hardware-tuning,omitempty"` + Autosize *bool `json:"autosize,omitempty"` // internal only this argument not passed by the user // but detected automatically createForHypershift bool } func (pca *ProfileCreatorArgs) AddFlags(flags *pflag.FlagSet) { - flags.IntVar(&pca.ReservedCPUCount, "reserved-cpu-count", 0, "Number of reserved CPUs (required)") + flags.IntVar(&pca.ReservedCPUCount, "reserved-cpu-count", 0, "Number of reserved CPUs") flags.IntVar(&pca.OfflinedCPUCount, "offlined-cpu-count", 0, "Number of offlined CPUs") flags.BoolVar(&pca.SplitReservedCPUsAcrossNUMA, "split-reserved-cpus-across-numa", false, "Split the Reserved CPUs across NUMA nodes") flags.StringVar(&pca.MCPName, "mcp-name", "", "MCP name corresponding to the target machines (required)") @@ -431,6 +453,7 @@ func (pca *ProfileCreatorArgs) AddFlags(flags *pflag.FlagSet) { flags.BoolVar(pca.PerPodPowerManagement, "per-pod-power-management", false, "Enable Per Pod Power Management") flags.BoolVar(&pca.EnableHardwareTuning, "enable-hardware-tuning", false, "Enable setting maximum cpu frequencies") flags.StringVar(&pca.NodePoolName, "node-pool-name", "", "Node pool name corresponding to the target machines (HyperShift only)") + flags.BoolVar(pca.Autosize, "autosize", false, "autosize the control plane") } func makePerformanceProfileFrom(profileData ProfileData) (runtime.Object, error) { @@ -582,3 +605,7 @@ func setSelectorsFor(profileData *ProfileData, args *ProfileCreatorArgs) error { profileData.mcpSelector = mcpSelector return nil } + +func isAutosizeEnabled(pcArgs *ProfileCreatorArgs) bool { + return pcArgs.Autosize != nil && *pcArgs.Autosize +} diff --git a/vendor/golang.org/x/tools/container/intsets/sparse.go b/vendor/golang.org/x/tools/container/intsets/sparse.go new file mode 100644 index 0000000000..c56aacc28b --- /dev/null +++ b/vendor/golang.org/x/tools/container/intsets/sparse.go @@ -0,0 +1,1107 @@ +// Copyright 2014 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package intsets provides Sparse, a compact and fast representation +// for sparse sets of int values. +// +// The time complexity of the operations Len, Insert, Remove and Has +// is in O(n) but in practice those methods are faster and more +// space-efficient than equivalent operations on sets based on the Go +// map type. The IsEmpty, Min, Max, Clear and TakeMin operations +// require constant time. +package intsets // import "golang.org/x/tools/container/intsets" + +// TODO(adonovan): +// - Add InsertAll(...int), RemoveAll(...int) +// - Add 'bool changed' results for {Intersection,Difference}With too. +// +// TODO(adonovan): implement Dense, a dense bit vector with a similar API. +// The space usage would be proportional to Max(), not Len(), and the +// implementation would be based upon big.Int. +// +// TODO(adonovan): opt: make UnionWith and Difference faster. +// These are the hot-spots for go/pointer. + +import ( + "bytes" + "fmt" + "math/bits" +) + +// A Sparse is a set of int values. +// Sparse operations (even queries) are not concurrency-safe. +// +// The zero value for Sparse is a valid empty set. +// +// Sparse sets must be copied using the Copy method, not by assigning +// a Sparse value. +type Sparse struct { + // An uninitialized Sparse represents an empty set. + // An empty set may also be represented by + // root.next == root.prev == &root. + // + // The root is always the block with the smallest offset. + // It can be empty, but only if it is the only block; in that case, offset is + // MaxInt (which is not a valid offset). + root block +} + +type word uintptr + +const ( + _m = ^word(0) + bitsPerWord = 8 << (_m>>8&1 + _m>>16&1 + _m>>32&1) + bitsPerBlock = 256 // optimal value for go/pointer solver performance + wordsPerBlock = bitsPerBlock / bitsPerWord +) + +// Limit values of implementation-specific int type. +const ( + MaxInt = int(^uint(0) >> 1) + MinInt = -MaxInt - 1 +) + +// popcount returns the number of set bits in w. +func popcount(x word) int { + // Avoid OnesCount(uint): don't assume uint = uintptr. + if bitsPerWord == 32 { + return bits.OnesCount32(uint32(x)) + } else { + return bits.OnesCount64(uint64(x)) + } +} + +// nlz returns the number of leading zeros of x. +func nlz(x word) int { + // Avoid LeadingZeros(uint): don't assume uint = uintptr. + if bitsPerWord == 32 { + return bits.LeadingZeros32(uint32(x)) + } else { + return bits.LeadingZeros64(uint64(x)) + } +} + +// ntz returns the number of trailing zeros of x. +func ntz(x word) int { + // Avoid TrailingZeros(uint): don't assume uint = uintptr. + if bitsPerWord == 32 { + return bits.TrailingZeros32(uint32(x)) + } else { + return bits.TrailingZeros64(uint64(x)) + } +} + +// -- block ------------------------------------------------------------ + +// A set is represented as a circular doubly-linked list of blocks, +// each containing an offset and a bit array of fixed size +// bitsPerBlock; the blocks are ordered by increasing offset. +// +// The set contains an element x iff the block whose offset is x - (x +// mod bitsPerBlock) has the bit (x mod bitsPerBlock) set, where mod +// is the Euclidean remainder. +// +// A block may only be empty transiently. +type block struct { + offset int // offset mod bitsPerBlock == 0 + bits [wordsPerBlock]word // contains at least one set bit + next, prev *block // doubly-linked list of blocks +} + +// wordMask returns the word index (in block.bits) +// and single-bit mask for the block's ith bit. +func wordMask(i uint) (w uint, mask word) { + w = i / bitsPerWord + mask = 1 << (i % bitsPerWord) + return +} + +// insert sets the block b's ith bit and +// returns true if it was not already set. +func (b *block) insert(i uint) bool { + w, mask := wordMask(i) + if b.bits[w]&mask == 0 { + b.bits[w] |= mask + return true + } + return false +} + +// remove clears the block's ith bit and +// returns true if the bit was previously set. +// NB: may leave the block empty. +func (b *block) remove(i uint) bool { + w, mask := wordMask(i) + if b.bits[w]&mask != 0 { + b.bits[w] &^= mask + return true + } + return false +} + +// has reports whether the block's ith bit is set. +func (b *block) has(i uint) bool { + w, mask := wordMask(i) + return b.bits[w]&mask != 0 +} + +// empty reports whether b.len()==0, but more efficiently. +func (b *block) empty() bool { + for _, w := range b.bits { + if w != 0 { + return false + } + } + return true +} + +// len returns the number of set bits in block b. +func (b *block) len() int { + var l int + for _, w := range b.bits { + l += popcount(w) + } + return l +} + +// max returns the maximum element of the block. +// The block must not be empty. +func (b *block) max() int { + bi := b.offset + bitsPerBlock + // Decrement bi by number of high zeros in last.bits. + for i := len(b.bits) - 1; i >= 0; i-- { + if w := b.bits[i]; w != 0 { + return bi - nlz(w) - 1 + } + bi -= bitsPerWord + } + panic("BUG: empty block") +} + +// min returns the minimum element of the block, +// and also removes it if take is set. +// The block must not be initially empty. +// NB: may leave the block empty. +func (b *block) min(take bool) int { + for i, w := range b.bits { + if w != 0 { + tz := ntz(w) + if take { + b.bits[i] = w &^ (1 << uint(tz)) + } + return b.offset + i*bitsPerWord + tz + } + } + panic("BUG: empty block") +} + +// lowerBound returns the smallest element of the block that is greater than or +// equal to the element corresponding to the ith bit. If there is no such +// element, the second return value is false. +func (b *block) lowerBound(i uint) (int, bool) { + w := i / bitsPerWord + bit := i % bitsPerWord + + if val := b.bits[w] >> bit; val != 0 { + return b.offset + int(i) + ntz(val), true + } + + for w++; w < wordsPerBlock; w++ { + if val := b.bits[w]; val != 0 { + return b.offset + int(w*bitsPerWord) + ntz(val), true + } + } + + return 0, false +} + +// forEach calls f for each element of block b. +// f must not mutate b's enclosing Sparse. +func (b *block) forEach(f func(int)) { + for i, w := range b.bits { + offset := b.offset + i*bitsPerWord + for bi := 0; w != 0 && bi < bitsPerWord; bi++ { + if w&1 != 0 { + f(offset) + } + offset++ + w >>= 1 + } + } +} + +// offsetAndBitIndex returns the offset of the block that would +// contain x and the bit index of x within that block. +func offsetAndBitIndex(x int) (int, uint) { + mod := x % bitsPerBlock + if mod < 0 { + // Euclidean (non-negative) remainder + mod += bitsPerBlock + } + return x - mod, uint(mod) +} + +// -- Sparse -------------------------------------------------------------- + +// none is a shared, empty, sentinel block that indicates the end of a block +// list. +var none block + +// Dummy type used to generate an implicit panic. This must be defined at the +// package level; if it is defined inside a function, it prevents the inlining +// of that function. +type to_copy_a_sparse_you_must_call_its_Copy_method struct{} + +// init ensures s is properly initialized. +func (s *Sparse) init() { + root := &s.root + if root.next == nil { + root.offset = MaxInt + root.next = root + root.prev = root + } else if root.next.prev != root { + // Copying a Sparse x leads to pernicious corruption: the + // new Sparse y shares the old linked list, but iteration + // on y will never encounter &y.root so it goes into a + // loop. Fail fast before this occurs. + // We don't want to call panic here because it prevents the + // inlining of this function. + _ = (interface{}(nil)).(to_copy_a_sparse_you_must_call_its_Copy_method) + } +} + +func (s *Sparse) first() *block { + s.init() + if s.root.offset == MaxInt { + return &none + } + return &s.root +} + +// next returns the next block in the list, or end if b is the last block. +func (s *Sparse) next(b *block) *block { + if b.next == &s.root { + return &none + } + return b.next +} + +// IsEmpty reports whether the set s is empty. +func (s *Sparse) IsEmpty() bool { + return s.root.next == nil || s.root.offset == MaxInt +} + +// Len returns the number of elements in the set s. +func (s *Sparse) Len() int { + var l int + for b := s.first(); b != &none; b = s.next(b) { + l += b.len() + } + return l +} + +// Max returns the maximum element of the set s, or MinInt if s is empty. +func (s *Sparse) Max() int { + if s.IsEmpty() { + return MinInt + } + return s.root.prev.max() +} + +// Min returns the minimum element of the set s, or MaxInt if s is empty. +func (s *Sparse) Min() int { + if s.IsEmpty() { + return MaxInt + } + return s.root.min(false) +} + +// LowerBound returns the smallest element >= x, or MaxInt if there is no such +// element. +func (s *Sparse) LowerBound(x int) int { + offset, i := offsetAndBitIndex(x) + for b := s.first(); b != &none; b = s.next(b) { + if b.offset > offset { + return b.min(false) + } + if b.offset == offset { + if y, ok := b.lowerBound(i); ok { + return y + } + } + } + return MaxInt +} + +// block returns the block that would contain offset, +// or nil if s contains no such block. +// Precondition: offset is a multiple of bitsPerBlock. +func (s *Sparse) block(offset int) *block { + for b := s.first(); b != &none && b.offset <= offset; b = s.next(b) { + if b.offset == offset { + return b + } + } + return nil +} + +// Insert adds x to the set s, and reports whether the set grew. +func (s *Sparse) Insert(x int) bool { + offset, i := offsetAndBitIndex(x) + + b := s.first() + for ; b != &none && b.offset <= offset; b = s.next(b) { + if b.offset == offset { + return b.insert(i) + } + } + + // Insert new block before b. + new := s.insertBlockBefore(b) + new.offset = offset + return new.insert(i) +} + +// removeBlock removes a block and returns the block that followed it (or end if +// it was the last block). +func (s *Sparse) removeBlock(b *block) *block { + if b != &s.root { + b.prev.next = b.next + b.next.prev = b.prev + if b.next == &s.root { + return &none + } + return b.next + } + + first := s.root.next + if first == &s.root { + // This was the only block. + s.Clear() + return &none + } + s.root.offset = first.offset + s.root.bits = first.bits + if first.next == &s.root { + // Single block remaining. + s.root.next = &s.root + s.root.prev = &s.root + } else { + s.root.next = first.next + first.next.prev = &s.root + } + return &s.root +} + +// Remove removes x from the set s, and reports whether the set shrank. +func (s *Sparse) Remove(x int) bool { + offset, i := offsetAndBitIndex(x) + if b := s.block(offset); b != nil { + if !b.remove(i) { + return false + } + if b.empty() { + s.removeBlock(b) + } + return true + } + return false +} + +// Clear removes all elements from the set s. +func (s *Sparse) Clear() { + s.root = block{ + offset: MaxInt, + next: &s.root, + prev: &s.root, + } +} + +// If set s is non-empty, TakeMin sets *p to the minimum element of +// the set s, removes that element from the set and returns true. +// Otherwise, it returns false and *p is undefined. +// +// This method may be used for iteration over a worklist like so: +// +// var x int +// for worklist.TakeMin(&x) { use(x) } +func (s *Sparse) TakeMin(p *int) bool { + if s.IsEmpty() { + return false + } + *p = s.root.min(true) + if s.root.empty() { + s.removeBlock(&s.root) + } + return true +} + +// Has reports whether x is an element of the set s. +func (s *Sparse) Has(x int) bool { + offset, i := offsetAndBitIndex(x) + if b := s.block(offset); b != nil { + return b.has(i) + } + return false +} + +// forEach applies function f to each element of the set s in order. +// +// f must not mutate s. Consequently, forEach is not safe to expose +// to clients. In any case, using "range s.AppendTo()" allows more +// natural control flow with continue/break/return. +func (s *Sparse) forEach(f func(int)) { + for b := s.first(); b != &none; b = s.next(b) { + b.forEach(f) + } +} + +// Copy sets s to the value of x. +func (s *Sparse) Copy(x *Sparse) { + if s == x { + return + } + + xb := x.first() + sb := s.first() + for xb != &none { + if sb == &none { + sb = s.insertBlockBefore(sb) + } + sb.offset = xb.offset + sb.bits = xb.bits + xb = x.next(xb) + sb = s.next(sb) + } + s.discardTail(sb) +} + +// insertBlockBefore returns a new block, inserting it before next. +// If next is the root, the root is replaced. If next is end, the block is +// inserted at the end. +func (s *Sparse) insertBlockBefore(next *block) *block { + if s.IsEmpty() { + if next != &none { + panic("BUG: passed block with empty set") + } + return &s.root + } + + if next == &s.root { + // Special case: we need to create a new block that will become the root + // block.The old root block becomes the second block. + second := s.root + s.root = block{ + next: &second, + } + if second.next == &s.root { + s.root.prev = &second + } else { + s.root.prev = second.prev + second.next.prev = &second + second.prev = &s.root + } + return &s.root + } + if next == &none { + // Insert before root. + next = &s.root + } + b := new(block) + b.next = next + b.prev = next.prev + b.prev.next = b + next.prev = b + return b +} + +// discardTail removes block b and all its successors from s. +func (s *Sparse) discardTail(b *block) { + if b != &none { + if b == &s.root { + s.Clear() + } else { + b.prev.next = &s.root + s.root.prev = b.prev + } + } +} + +// IntersectionWith sets s to the intersection s ∩ x. +func (s *Sparse) IntersectionWith(x *Sparse) { + if s == x { + return + } + + xb := x.first() + sb := s.first() + for xb != &none && sb != &none { + switch { + case xb.offset < sb.offset: + xb = x.next(xb) + + case xb.offset > sb.offset: + sb = s.removeBlock(sb) + + default: + var sum word + for i := range sb.bits { + r := xb.bits[i] & sb.bits[i] + sb.bits[i] = r + sum |= r + } + if sum != 0 { + sb = s.next(sb) + } else { + // sb will be overwritten or removed + } + + xb = x.next(xb) + } + } + + s.discardTail(sb) +} + +// Intersection sets s to the intersection x ∩ y. +func (s *Sparse) Intersection(x, y *Sparse) { + switch { + case s == x: + s.IntersectionWith(y) + return + case s == y: + s.IntersectionWith(x) + return + case x == y: + s.Copy(x) + return + } + + xb := x.first() + yb := y.first() + sb := s.first() + for xb != &none && yb != &none { + switch { + case xb.offset < yb.offset: + xb = x.next(xb) + continue + case xb.offset > yb.offset: + yb = y.next(yb) + continue + } + + if sb == &none { + sb = s.insertBlockBefore(sb) + } + sb.offset = xb.offset + + var sum word + for i := range sb.bits { + r := xb.bits[i] & yb.bits[i] + sb.bits[i] = r + sum |= r + } + if sum != 0 { + sb = s.next(sb) + } else { + // sb will be overwritten or removed + } + + xb = x.next(xb) + yb = y.next(yb) + } + + s.discardTail(sb) +} + +// Intersects reports whether s ∩ x ≠ ∅. +func (s *Sparse) Intersects(x *Sparse) bool { + sb := s.first() + xb := x.first() + for sb != &none && xb != &none { + switch { + case xb.offset < sb.offset: + xb = x.next(xb) + case xb.offset > sb.offset: + sb = s.next(sb) + default: + for i := range sb.bits { + if sb.bits[i]&xb.bits[i] != 0 { + return true + } + } + sb = s.next(sb) + xb = x.next(xb) + } + } + return false +} + +// UnionWith sets s to the union s ∪ x, and reports whether s grew. +func (s *Sparse) UnionWith(x *Sparse) bool { + if s == x { + return false + } + + var changed bool + xb := x.first() + sb := s.first() + for xb != &none { + if sb != &none && sb.offset == xb.offset { + for i := range xb.bits { + union := sb.bits[i] | xb.bits[i] + if sb.bits[i] != union { + sb.bits[i] = union + changed = true + } + } + xb = x.next(xb) + } else if sb == &none || sb.offset > xb.offset { + sb = s.insertBlockBefore(sb) + sb.offset = xb.offset + sb.bits = xb.bits + changed = true + + xb = x.next(xb) + } + sb = s.next(sb) + } + return changed +} + +// Union sets s to the union x ∪ y. +func (s *Sparse) Union(x, y *Sparse) { + switch { + case x == y: + s.Copy(x) + return + case s == x: + s.UnionWith(y) + return + case s == y: + s.UnionWith(x) + return + } + + xb := x.first() + yb := y.first() + sb := s.first() + for xb != &none || yb != &none { + if sb == &none { + sb = s.insertBlockBefore(sb) + } + switch { + case yb == &none || (xb != &none && xb.offset < yb.offset): + sb.offset = xb.offset + sb.bits = xb.bits + xb = x.next(xb) + + case xb == &none || (yb != &none && yb.offset < xb.offset): + sb.offset = yb.offset + sb.bits = yb.bits + yb = y.next(yb) + + default: + sb.offset = xb.offset + for i := range xb.bits { + sb.bits[i] = xb.bits[i] | yb.bits[i] + } + xb = x.next(xb) + yb = y.next(yb) + } + sb = s.next(sb) + } + + s.discardTail(sb) +} + +// DifferenceWith sets s to the difference s ∖ x. +func (s *Sparse) DifferenceWith(x *Sparse) { + if s == x { + s.Clear() + return + } + + xb := x.first() + sb := s.first() + for xb != &none && sb != &none { + switch { + case xb.offset > sb.offset: + sb = s.next(sb) + + case xb.offset < sb.offset: + xb = x.next(xb) + + default: + var sum word + for i := range sb.bits { + r := sb.bits[i] & ^xb.bits[i] + sb.bits[i] = r + sum |= r + } + if sum == 0 { + sb = s.removeBlock(sb) + } else { + sb = s.next(sb) + } + xb = x.next(xb) + } + } +} + +// Difference sets s to the difference x ∖ y. +func (s *Sparse) Difference(x, y *Sparse) { + switch { + case x == y: + s.Clear() + return + case s == x: + s.DifferenceWith(y) + return + case s == y: + var y2 Sparse + y2.Copy(y) + s.Difference(x, &y2) + return + } + + xb := x.first() + yb := y.first() + sb := s.first() + for xb != &none && yb != &none { + if xb.offset > yb.offset { + // y has block, x has &none + yb = y.next(yb) + continue + } + + if sb == &none { + sb = s.insertBlockBefore(sb) + } + sb.offset = xb.offset + + switch { + case xb.offset < yb.offset: + // x has block, y has &none + sb.bits = xb.bits + + sb = s.next(sb) + + default: + // x and y have corresponding blocks + var sum word + for i := range sb.bits { + r := xb.bits[i] & ^yb.bits[i] + sb.bits[i] = r + sum |= r + } + if sum != 0 { + sb = s.next(sb) + } else { + // sb will be overwritten or removed + } + + yb = y.next(yb) + } + xb = x.next(xb) + } + + for xb != &none { + if sb == &none { + sb = s.insertBlockBefore(sb) + } + sb.offset = xb.offset + sb.bits = xb.bits + sb = s.next(sb) + + xb = x.next(xb) + } + + s.discardTail(sb) +} + +// SymmetricDifferenceWith sets s to the symmetric difference s ∆ x. +func (s *Sparse) SymmetricDifferenceWith(x *Sparse) { + if s == x { + s.Clear() + return + } + + sb := s.first() + xb := x.first() + for xb != &none && sb != &none { + switch { + case sb.offset < xb.offset: + sb = s.next(sb) + case xb.offset < sb.offset: + nb := s.insertBlockBefore(sb) + nb.offset = xb.offset + nb.bits = xb.bits + xb = x.next(xb) + default: + var sum word + for i := range sb.bits { + r := sb.bits[i] ^ xb.bits[i] + sb.bits[i] = r + sum |= r + } + if sum == 0 { + sb = s.removeBlock(sb) + } else { + sb = s.next(sb) + } + xb = x.next(xb) + } + } + + for xb != &none { // append the tail of x to s + sb = s.insertBlockBefore(sb) + sb.offset = xb.offset + sb.bits = xb.bits + sb = s.next(sb) + xb = x.next(xb) + } +} + +// SymmetricDifference sets s to the symmetric difference x ∆ y. +func (s *Sparse) SymmetricDifference(x, y *Sparse) { + switch { + case x == y: + s.Clear() + return + case s == x: + s.SymmetricDifferenceWith(y) + return + case s == y: + s.SymmetricDifferenceWith(x) + return + } + + sb := s.first() + xb := x.first() + yb := y.first() + for xb != &none && yb != &none { + if sb == &none { + sb = s.insertBlockBefore(sb) + } + switch { + case yb.offset < xb.offset: + sb.offset = yb.offset + sb.bits = yb.bits + sb = s.next(sb) + yb = y.next(yb) + case xb.offset < yb.offset: + sb.offset = xb.offset + sb.bits = xb.bits + sb = s.next(sb) + xb = x.next(xb) + default: + var sum word + for i := range sb.bits { + r := xb.bits[i] ^ yb.bits[i] + sb.bits[i] = r + sum |= r + } + if sum != 0 { + sb.offset = xb.offset + sb = s.next(sb) + } + xb = x.next(xb) + yb = y.next(yb) + } + } + + for xb != &none { // append the tail of x to s + if sb == &none { + sb = s.insertBlockBefore(sb) + } + sb.offset = xb.offset + sb.bits = xb.bits + sb = s.next(sb) + xb = x.next(xb) + } + + for yb != &none { // append the tail of y to s + if sb == &none { + sb = s.insertBlockBefore(sb) + } + sb.offset = yb.offset + sb.bits = yb.bits + sb = s.next(sb) + yb = y.next(yb) + } + + s.discardTail(sb) +} + +// SubsetOf reports whether s ∖ x = ∅. +func (s *Sparse) SubsetOf(x *Sparse) bool { + if s == x { + return true + } + + sb := s.first() + xb := x.first() + for sb != &none { + switch { + case xb == &none || xb.offset > sb.offset: + return false + case xb.offset < sb.offset: + xb = x.next(xb) + default: + for i := range sb.bits { + if sb.bits[i]&^xb.bits[i] != 0 { + return false + } + } + sb = s.next(sb) + xb = x.next(xb) + } + } + return true +} + +// Equals reports whether the sets s and t have the same elements. +func (s *Sparse) Equals(t *Sparse) bool { + if s == t { + return true + } + sb := s.first() + tb := t.first() + for { + switch { + case sb == &none && tb == &none: + return true + case sb == &none || tb == &none: + return false + case sb.offset != tb.offset: + return false + case sb.bits != tb.bits: + return false + } + + sb = s.next(sb) + tb = t.next(tb) + } +} + +// String returns a human-readable description of the set s. +func (s *Sparse) String() string { + var buf bytes.Buffer + buf.WriteByte('{') + s.forEach(func(x int) { + if buf.Len() > 1 { + buf.WriteByte(' ') + } + fmt.Fprintf(&buf, "%d", x) + }) + buf.WriteByte('}') + return buf.String() +} + +// BitString returns the set as a string of 1s and 0s denoting the sum +// of the i'th powers of 2, for each i in s. A radix point, always +// preceded by a digit, appears if the sum is non-integral. +// +// Examples: +// +// {}.BitString() = "0" +// {4,5}.BitString() = "110000" +// {-3}.BitString() = "0.001" +// {-3,0,4,5}.BitString() = "110001.001" +func (s *Sparse) BitString() string { + if s.IsEmpty() { + return "0" + } + + min, max := s.Min(), s.Max() + var nbytes int + if max > 0 { + nbytes = max + } + nbytes++ // zero bit + radix := nbytes + if min < 0 { + nbytes += len(".") - min + } + + b := make([]byte, nbytes) + for i := range b { + b[i] = '0' + } + if radix < nbytes { + b[radix] = '.' + } + s.forEach(func(x int) { + if x >= 0 { + x += len(".") + } + b[radix-x] = '1' + }) + return string(b) +} + +// GoString returns a string showing the internal representation of +// the set s. +func (s *Sparse) GoString() string { + var buf bytes.Buffer + for b := s.first(); b != &none; b = s.next(b) { + fmt.Fprintf(&buf, "block %p {offset=%d next=%p prev=%p", + b, b.offset, b.next, b.prev) + for _, w := range b.bits { + fmt.Fprintf(&buf, " 0%016x", w) + } + fmt.Fprintf(&buf, "}\n") + } + return buf.String() +} + +// AppendTo returns the result of appending the elements of s to slice +// in order. +func (s *Sparse) AppendTo(slice []int) []int { + s.forEach(func(x int) { + slice = append(slice, x) + }) + return slice +} + +// -- Testing/debugging ------------------------------------------------ + +// check returns an error if the representation invariants of s are violated. +// (unused; retained for debugging) +func (s *Sparse) check() error { + s.init() + if s.root.empty() { + // An empty set must have only the root block with offset MaxInt. + if s.root.next != &s.root { + return fmt.Errorf("multiple blocks with empty root block") + } + if s.root.offset != MaxInt { + return fmt.Errorf("empty set has offset %d, should be MaxInt", s.root.offset) + } + return nil + } + for b := s.first(); ; b = s.next(b) { + if b.offset%bitsPerBlock != 0 { + return fmt.Errorf("bad offset modulo: %d", b.offset) + } + if b.empty() { + return fmt.Errorf("empty block") + } + if b.prev.next != b { + return fmt.Errorf("bad prev.next link") + } + if b.next.prev != b { + return fmt.Errorf("bad next.prev link") + } + if b.next == &s.root { + break + } + if b.offset >= b.next.offset { + return fmt.Errorf("bad offset order: b.offset=%d, b.next.offset=%d", + b.offset, b.next.offset) + } + } + return nil +} diff --git a/vendor/gonum.org/v1/gonum/AUTHORS b/vendor/gonum.org/v1/gonum/AUTHORS new file mode 100644 index 0000000000..1f0e79b4c0 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/AUTHORS @@ -0,0 +1,141 @@ +# This is the official list of Gonum authors for copyright purposes. +# This file is distinct from the CONTRIBUTORS files. +# See the latter for an explanation. + +# Names should be added to this file as +# Name or Organization +# The email address is not required for organizations. + +# Please keep the list sorted. + +Alexander Egurnov +Andrei Blinnikov +antichris +Bailey Lissington +Bill Gray +Bill Noon +Brendan Tracey +Brent Pedersen +Bulat Khasanov +Chad Kunde +Chan Kwan Yin +Chih-Wei Chang +Chong-Yeol Nah +Chris Tessum +Christophe Meessen +Christopher Waldon +Clayton Northey +Coana ApS +Dan Kortschak +Daniel Fireman +Dario Heinisch +David Kleiven +David Samborski +Davor Kapsa +DeepMind Technologies +Delaney Gillilan +Dezmond Goff +Dirk Müller +Dong-hee Na +Dustin Spicuzza +Egon Elbre +Ekaterina Efimova +Eng Zer Jun +Ethan Burns +Ethan Reesor +Evert Lammerts +Evgeny Savinov +Fabian Wickborn +Facundo Gaich +Fazlul Shahriar +Francesc Campoy +Google Inc +Gustaf Johansson +Hossein Zolfi +Huang Peng Fei +Iakov Davydov +Igor Mikushkin +Iskander Sharipov +Jack Tudbury +Jalem Raj Rohit +James Bell +James Bowman +James Holmes <32bitkid@gmail.com> +Janne Snabb +Jeremy Atkinson +Jes Cok +Jinesi Yelizati +Jonas Kahler +Jonas Schulze +Jonathan Bluett-Duncan +Jonathan J Lawlor +Jonathan Reiter +Jonathan Schroeder +Joost van Amersfoort +Jordan Stoker +Joseph Watson +Josh Wilson +Julien Roland +Kai Trukenmüller +Kendall Marcus +Kent English +Kevin C. Zimmerman +Kirill Motkov +Konstantin Shaposhnikov +Leonid Kneller +Lyron Winderbaum +Marco Leogrande +Mark Canning +Mark Skilbeck +Martin Diz +Matthew Connelly +Matthieu Di Mercurio +Max Halford +Maxim Sergeev +Microsoft Corporation +MinJae Kwon +Nathan Edwards +Nick Potts +Nils Wogatzky +Olivier Wulveryck +Or Rikon +Patricio Whittingslow +Patrick DeVivo +Pontus Melke +Renee French +Rishi Desai +Robert Kleffner +Robin Eklind +Roger Welin +Rondall Jones +Sam Zaydel +Samuel Kelemen +Saran Ahluwalia +Scott Holden +Scott Kiesel +Sebastien Binet +Shawn Smith +Sintela Ltd +source{d} +Spencer Lyon +Steve McCoy +Taesu Pyo +Takeshi Yoneda +Tamir Hyman +The University of Adelaide +The University of Minnesota +The University of Washington +Thomas Berg +Tobin Harding +Tom Payne +Tristan Nicholls +Valentin Deleplace +Vincent Thiery +Vladimír Chalupecký +Will Tekulve +Yasuhiro Matsumoto +Yevgeniy Vahlis +Yucheng Zhu +Yunomi +Zhan Shan Mao +Zoe Juozapaitis diff --git a/vendor/gonum.org/v1/gonum/CONTRIBUTORS b/vendor/gonum.org/v1/gonum/CONTRIBUTORS new file mode 100644 index 0000000000..1fbe736c5f --- /dev/null +++ b/vendor/gonum.org/v1/gonum/CONTRIBUTORS @@ -0,0 +1,144 @@ +# This is the official list of people who can contribute +# (and typically have contributed) code to the Gonum +# project. +# +# The AUTHORS file lists the copyright holders; this file +# lists people. For example, Google employees would be listed here +# but not in AUTHORS, because Google would hold the copyright. +# +# When adding J Random Contributor's name to this file, +# either J's name or J's organization's name should be +# added to the AUTHORS file. +# +# Names should be added to this file like so: +# Name +# +# Please keep the list sorted. + +Alexander Egurnov +Andrei Blinnikov +Andrew Brampton +antichris +Bailey Lissington +Bill Gray +Bill Noon +Brendan Tracey +Brent Pedersen +Bulat Khasanov +Chad Kunde +Chan Kwan Yin +Chih-Wei Chang +Chong-Yeol Nah +Chris Tessum +Christophe Meessen +Christopher Waldon +Clayton Northey +Dan Kortschak +Dan Lorenc +Daniel Fireman +Dario Heinisch +David Kleiven +David Samborski +Davor Kapsa +Delaney Gillilan +Dezmond Goff +Dirk Müller +Dong-hee Na +Dustin Spicuzza +Egon Elbre +Ekaterina Efimova +Eng Zer Jun +Ethan Burns +Ethan Reesor +Evert Lammerts +Evgeny Savinov +Fabian Wickborn +Facundo Gaich +Fazlul Shahriar +Francesc Campoy +Gustaf Johansson +Hossein Zolfi +Huang Peng Fei +Iakov Davydov +Igor Mikushkin +Iskander Sharipov +Jack Tudbury +Jalem Raj Rohit +James Bell +James Bowman +James Holmes <32bitkid@gmail.com> +Janne Snabb +Jeremy Atkinson +Jes Cok +Jinesi Yelizati +Jon Richards +Jonas Kahler +Jonas Schulze +Jonathan Bluett-Duncan +Jonathan J Lawlor +Jonathan Reiter +Jonathan Schroeder +Joost van Amersfoort +Jordan Stoker +Joseph Watson +Josh Wilson +Julien Roland +Kai Trukenmüller +Kendall Marcus +Kent English +Kevin C. Zimmerman +Kirill Motkov +Konstantin Shaposhnikov +Leonid Kneller +Lyron Winderbaum +Marco Leogrande +Mark Canning +Mark Skilbeck +Martin Diz +Matthew Connelly +Matthieu Di Mercurio +Max Halford +Maxim Sergeev +MinJae Kwon +Nathan Edwards +Nick Potts +Nils Wogatzky +Olivier Wulveryck +Or Rikon +Oskar Haarklou Veileborg +Patricio Whittingslow +Patrick DeVivo +Pontus Melke +Renee French +Rishi Desai +Robert Kleffner +Robin Eklind +Roger Welin +Roman Werpachowski +Rondall Jones +Sam Zaydel +Samuel Kelemen +Saran Ahluwalia +Scott Holden +Scott Kiesel +Sebastien Binet +Shawn Smith +Spencer Lyon +Steve McCoy +Taesu Pyo +Takeshi Yoneda +Tamir Hyman +Thomas Berg +Tobin Harding +Tom Payne +Tristan Nicholls +Valentin Deleplace +Vincent Thiery +Vladimír Chalupecký +Will Tekulve +Yasuhiro Matsumoto +Yevgeniy Vahlis +Yucheng Zhu +Yunomi +Zhan Shan Mao +Zoe Juozapaitis diff --git a/vendor/gonum.org/v1/gonum/LICENSE b/vendor/gonum.org/v1/gonum/LICENSE new file mode 100644 index 0000000000..ed477e59b5 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/LICENSE @@ -0,0 +1,23 @@ +Copyright ©2013 The Gonum Authors. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Gonum project nor the names of its authors and + contributors may be used to endorse or promote products derived from this + software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. \ No newline at end of file diff --git a/vendor/gonum.org/v1/gonum/blas/README.md b/vendor/gonum.org/v1/gonum/blas/README.md new file mode 100644 index 0000000000..16d62bd355 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/blas/README.md @@ -0,0 +1,51 @@ +# Gonum BLAS + +[![go.dev reference](https://pkg.go.dev/badge/gonum.org/v1/gonum/blas)](https://pkg.go.dev/gonum.org/v1/gonum/blas) +[![GoDoc](https://godocs.io/gonum.org/v1/gonum/blas?status.svg)](https://godocs.io/gonum.org/v1/gonum/blas) + +A collection of packages to provide BLAS functionality for the [Go programming +language](http://golang.org) + +## Installation +```sh + go get gonum.org/v1/gonum/blas/... +``` + +## Packages + +### blas + +Defines [BLAS API](http://www.netlib.org/blas/blast-forum/cinterface.pdf) split in several +interfaces. + +### blas/gonum + +Go implementation of the BLAS API (incomplete, implements the `float32` and `float64` API). + +### blas/blas64 and blas/blas32 + +Wrappers for an implementation of the double (i.e., `float64`) and single (`float32`) +precision real parts of the BLAS API. + +```Go +package main + +import ( + "fmt" + + "gonum.org/v1/gonum/blas/blas64" +) + +func main() { + v := blas64.Vector{Inc: 1, Data: []float64{1, 1, 1}} + v.N = len(v.Data) + fmt.Println("v has length:", blas64.Nrm2(v)) +} +``` + +### blas/cblas128 and blas/cblas64 + +Wrappers for an implementation of the double (i.e., `complex128`) and single (`complex64`) +precision complex parts of the blas API. + +Currently blas/cblas64 and blas/cblas128 require gonum.org/v1/netlib/blas. diff --git a/vendor/gonum.org/v1/gonum/blas/blas.go b/vendor/gonum.org/v1/gonum/blas/blas.go new file mode 100644 index 0000000000..9b933e3fc5 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/blas/blas.go @@ -0,0 +1,283 @@ +// Copyright ©2013 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:generate ./conversions.bash + +package blas + +// Flag constants indicate Givens transformation H matrix state. +type Flag int + +const ( + Identity Flag = -2 // H is the identity matrix; no rotation is needed. + Rescaling Flag = -1 // H specifies rescaling. + OffDiagonal Flag = 0 // Off-diagonal elements of H are non-unit. + Diagonal Flag = 1 // Diagonal elements of H are non-unit. +) + +// SrotmParams contains Givens transformation parameters returned +// by the Float32 Srotm method. +type SrotmParams struct { + Flag + H [4]float32 // Column-major 2 by 2 matrix. +} + +// DrotmParams contains Givens transformation parameters returned +// by the Float64 Drotm method. +type DrotmParams struct { + Flag + H [4]float64 // Column-major 2 by 2 matrix. +} + +// Transpose specifies the transposition operation of a matrix. +type Transpose byte + +const ( + NoTrans Transpose = 'N' + Trans Transpose = 'T' + ConjTrans Transpose = 'C' +) + +// Uplo specifies whether a matrix is upper or lower triangular. +type Uplo byte + +const ( + Upper Uplo = 'U' + Lower Uplo = 'L' + All Uplo = 'A' +) + +// Diag specifies whether a matrix is unit triangular. +type Diag byte + +const ( + NonUnit Diag = 'N' + Unit Diag = 'U' +) + +// Side specifies from which side a multiplication operation is performed. +type Side byte + +const ( + Left Side = 'L' + Right Side = 'R' +) + +// Float32 implements the single precision real BLAS routines. +type Float32 interface { + Float32Level1 + Float32Level2 + Float32Level3 +} + +// Float32Level1 implements the single precision real BLAS Level 1 routines. +type Float32Level1 interface { + Sdsdot(n int, alpha float32, x []float32, incX int, y []float32, incY int) float32 + Dsdot(n int, x []float32, incX int, y []float32, incY int) float64 + Sdot(n int, x []float32, incX int, y []float32, incY int) float32 + Snrm2(n int, x []float32, incX int) float32 + Sasum(n int, x []float32, incX int) float32 + Isamax(n int, x []float32, incX int) int + Sswap(n int, x []float32, incX int, y []float32, incY int) + Scopy(n int, x []float32, incX int, y []float32, incY int) + Saxpy(n int, alpha float32, x []float32, incX int, y []float32, incY int) + Srotg(a, b float32) (c, s, r, z float32) + Srotmg(d1, d2, b1, b2 float32) (p SrotmParams, rd1, rd2, rb1 float32) + Srot(n int, x []float32, incX int, y []float32, incY int, c, s float32) + Srotm(n int, x []float32, incX int, y []float32, incY int, p SrotmParams) + Sscal(n int, alpha float32, x []float32, incX int) +} + +// Float32Level2 implements the single precision real BLAS Level 2 routines. +type Float32Level2 interface { + Sgemv(tA Transpose, m, n int, alpha float32, a []float32, lda int, x []float32, incX int, beta float32, y []float32, incY int) + Sgbmv(tA Transpose, m, n, kL, kU int, alpha float32, a []float32, lda int, x []float32, incX int, beta float32, y []float32, incY int) + Strmv(ul Uplo, tA Transpose, d Diag, n int, a []float32, lda int, x []float32, incX int) + Stbmv(ul Uplo, tA Transpose, d Diag, n, k int, a []float32, lda int, x []float32, incX int) + Stpmv(ul Uplo, tA Transpose, d Diag, n int, ap []float32, x []float32, incX int) + Strsv(ul Uplo, tA Transpose, d Diag, n int, a []float32, lda int, x []float32, incX int) + Stbsv(ul Uplo, tA Transpose, d Diag, n, k int, a []float32, lda int, x []float32, incX int) + Stpsv(ul Uplo, tA Transpose, d Diag, n int, ap []float32, x []float32, incX int) + Ssymv(ul Uplo, n int, alpha float32, a []float32, lda int, x []float32, incX int, beta float32, y []float32, incY int) + Ssbmv(ul Uplo, n, k int, alpha float32, a []float32, lda int, x []float32, incX int, beta float32, y []float32, incY int) + Sspmv(ul Uplo, n int, alpha float32, ap []float32, x []float32, incX int, beta float32, y []float32, incY int) + Sger(m, n int, alpha float32, x []float32, incX int, y []float32, incY int, a []float32, lda int) + Ssyr(ul Uplo, n int, alpha float32, x []float32, incX int, a []float32, lda int) + Sspr(ul Uplo, n int, alpha float32, x []float32, incX int, ap []float32) + Ssyr2(ul Uplo, n int, alpha float32, x []float32, incX int, y []float32, incY int, a []float32, lda int) + Sspr2(ul Uplo, n int, alpha float32, x []float32, incX int, y []float32, incY int, a []float32) +} + +// Float32Level3 implements the single precision real BLAS Level 3 routines. +type Float32Level3 interface { + Sgemm(tA, tB Transpose, m, n, k int, alpha float32, a []float32, lda int, b []float32, ldb int, beta float32, c []float32, ldc int) + Ssymm(s Side, ul Uplo, m, n int, alpha float32, a []float32, lda int, b []float32, ldb int, beta float32, c []float32, ldc int) + Ssyrk(ul Uplo, t Transpose, n, k int, alpha float32, a []float32, lda int, beta float32, c []float32, ldc int) + Ssyr2k(ul Uplo, t Transpose, n, k int, alpha float32, a []float32, lda int, b []float32, ldb int, beta float32, c []float32, ldc int) + Strmm(s Side, ul Uplo, tA Transpose, d Diag, m, n int, alpha float32, a []float32, lda int, b []float32, ldb int) + Strsm(s Side, ul Uplo, tA Transpose, d Diag, m, n int, alpha float32, a []float32, lda int, b []float32, ldb int) +} + +// Float64 implements the single precision real BLAS routines. +type Float64 interface { + Float64Level1 + Float64Level2 + Float64Level3 +} + +// Float64Level1 implements the double precision real BLAS Level 1 routines. +type Float64Level1 interface { + Ddot(n int, x []float64, incX int, y []float64, incY int) float64 + Dnrm2(n int, x []float64, incX int) float64 + Dasum(n int, x []float64, incX int) float64 + Idamax(n int, x []float64, incX int) int + Dswap(n int, x []float64, incX int, y []float64, incY int) + Dcopy(n int, x []float64, incX int, y []float64, incY int) + Daxpy(n int, alpha float64, x []float64, incX int, y []float64, incY int) + Drotg(a, b float64) (c, s, r, z float64) + Drotmg(d1, d2, b1, b2 float64) (p DrotmParams, rd1, rd2, rb1 float64) + Drot(n int, x []float64, incX int, y []float64, incY int, c float64, s float64) + Drotm(n int, x []float64, incX int, y []float64, incY int, p DrotmParams) + Dscal(n int, alpha float64, x []float64, incX int) +} + +// Float64Level2 implements the double precision real BLAS Level 2 routines. +type Float64Level2 interface { + Dgemv(tA Transpose, m, n int, alpha float64, a []float64, lda int, x []float64, incX int, beta float64, y []float64, incY int) + Dgbmv(tA Transpose, m, n, kL, kU int, alpha float64, a []float64, lda int, x []float64, incX int, beta float64, y []float64, incY int) + Dtrmv(ul Uplo, tA Transpose, d Diag, n int, a []float64, lda int, x []float64, incX int) + Dtbmv(ul Uplo, tA Transpose, d Diag, n, k int, a []float64, lda int, x []float64, incX int) + Dtpmv(ul Uplo, tA Transpose, d Diag, n int, ap []float64, x []float64, incX int) + Dtrsv(ul Uplo, tA Transpose, d Diag, n int, a []float64, lda int, x []float64, incX int) + Dtbsv(ul Uplo, tA Transpose, d Diag, n, k int, a []float64, lda int, x []float64, incX int) + Dtpsv(ul Uplo, tA Transpose, d Diag, n int, ap []float64, x []float64, incX int) + Dsymv(ul Uplo, n int, alpha float64, a []float64, lda int, x []float64, incX int, beta float64, y []float64, incY int) + Dsbmv(ul Uplo, n, k int, alpha float64, a []float64, lda int, x []float64, incX int, beta float64, y []float64, incY int) + Dspmv(ul Uplo, n int, alpha float64, ap []float64, x []float64, incX int, beta float64, y []float64, incY int) + Dger(m, n int, alpha float64, x []float64, incX int, y []float64, incY int, a []float64, lda int) + Dsyr(ul Uplo, n int, alpha float64, x []float64, incX int, a []float64, lda int) + Dspr(ul Uplo, n int, alpha float64, x []float64, incX int, ap []float64) + Dsyr2(ul Uplo, n int, alpha float64, x []float64, incX int, y []float64, incY int, a []float64, lda int) + Dspr2(ul Uplo, n int, alpha float64, x []float64, incX int, y []float64, incY int, a []float64) +} + +// Float64Level3 implements the double precision real BLAS Level 3 routines. +type Float64Level3 interface { + Dgemm(tA, tB Transpose, m, n, k int, alpha float64, a []float64, lda int, b []float64, ldb int, beta float64, c []float64, ldc int) + Dsymm(s Side, ul Uplo, m, n int, alpha float64, a []float64, lda int, b []float64, ldb int, beta float64, c []float64, ldc int) + Dsyrk(ul Uplo, t Transpose, n, k int, alpha float64, a []float64, lda int, beta float64, c []float64, ldc int) + Dsyr2k(ul Uplo, t Transpose, n, k int, alpha float64, a []float64, lda int, b []float64, ldb int, beta float64, c []float64, ldc int) + Dtrmm(s Side, ul Uplo, tA Transpose, d Diag, m, n int, alpha float64, a []float64, lda int, b []float64, ldb int) + Dtrsm(s Side, ul Uplo, tA Transpose, d Diag, m, n int, alpha float64, a []float64, lda int, b []float64, ldb int) +} + +// Complex64 implements the single precision complex BLAS routines. +type Complex64 interface { + Complex64Level1 + Complex64Level2 + Complex64Level3 +} + +// Complex64Level1 implements the single precision complex BLAS Level 1 routines. +type Complex64Level1 interface { + Cdotu(n int, x []complex64, incX int, y []complex64, incY int) (dotu complex64) + Cdotc(n int, x []complex64, incX int, y []complex64, incY int) (dotc complex64) + Scnrm2(n int, x []complex64, incX int) float32 + Scasum(n int, x []complex64, incX int) float32 + Icamax(n int, x []complex64, incX int) int + Cswap(n int, x []complex64, incX int, y []complex64, incY int) + Ccopy(n int, x []complex64, incX int, y []complex64, incY int) + Caxpy(n int, alpha complex64, x []complex64, incX int, y []complex64, incY int) + Cscal(n int, alpha complex64, x []complex64, incX int) + Csscal(n int, alpha float32, x []complex64, incX int) +} + +// Complex64Level2 implements the single precision complex BLAS routines Level 2 routines. +type Complex64Level2 interface { + Cgemv(tA Transpose, m, n int, alpha complex64, a []complex64, lda int, x []complex64, incX int, beta complex64, y []complex64, incY int) + Cgbmv(tA Transpose, m, n, kL, kU int, alpha complex64, a []complex64, lda int, x []complex64, incX int, beta complex64, y []complex64, incY int) + Ctrmv(ul Uplo, tA Transpose, d Diag, n int, a []complex64, lda int, x []complex64, incX int) + Ctbmv(ul Uplo, tA Transpose, d Diag, n, k int, a []complex64, lda int, x []complex64, incX int) + Ctpmv(ul Uplo, tA Transpose, d Diag, n int, ap []complex64, x []complex64, incX int) + Ctrsv(ul Uplo, tA Transpose, d Diag, n int, a []complex64, lda int, x []complex64, incX int) + Ctbsv(ul Uplo, tA Transpose, d Diag, n, k int, a []complex64, lda int, x []complex64, incX int) + Ctpsv(ul Uplo, tA Transpose, d Diag, n int, ap []complex64, x []complex64, incX int) + Chemv(ul Uplo, n int, alpha complex64, a []complex64, lda int, x []complex64, incX int, beta complex64, y []complex64, incY int) + Chbmv(ul Uplo, n, k int, alpha complex64, a []complex64, lda int, x []complex64, incX int, beta complex64, y []complex64, incY int) + Chpmv(ul Uplo, n int, alpha complex64, ap []complex64, x []complex64, incX int, beta complex64, y []complex64, incY int) + Cgeru(m, n int, alpha complex64, x []complex64, incX int, y []complex64, incY int, a []complex64, lda int) + Cgerc(m, n int, alpha complex64, x []complex64, incX int, y []complex64, incY int, a []complex64, lda int) + Cher(ul Uplo, n int, alpha float32, x []complex64, incX int, a []complex64, lda int) + Chpr(ul Uplo, n int, alpha float32, x []complex64, incX int, a []complex64) + Cher2(ul Uplo, n int, alpha complex64, x []complex64, incX int, y []complex64, incY int, a []complex64, lda int) + Chpr2(ul Uplo, n int, alpha complex64, x []complex64, incX int, y []complex64, incY int, ap []complex64) +} + +// Complex64Level3 implements the single precision complex BLAS Level 3 routines. +type Complex64Level3 interface { + Cgemm(tA, tB Transpose, m, n, k int, alpha complex64, a []complex64, lda int, b []complex64, ldb int, beta complex64, c []complex64, ldc int) + Csymm(s Side, ul Uplo, m, n int, alpha complex64, a []complex64, lda int, b []complex64, ldb int, beta complex64, c []complex64, ldc int) + Csyrk(ul Uplo, t Transpose, n, k int, alpha complex64, a []complex64, lda int, beta complex64, c []complex64, ldc int) + Csyr2k(ul Uplo, t Transpose, n, k int, alpha complex64, a []complex64, lda int, b []complex64, ldb int, beta complex64, c []complex64, ldc int) + Ctrmm(s Side, ul Uplo, tA Transpose, d Diag, m, n int, alpha complex64, a []complex64, lda int, b []complex64, ldb int) + Ctrsm(s Side, ul Uplo, tA Transpose, d Diag, m, n int, alpha complex64, a []complex64, lda int, b []complex64, ldb int) + Chemm(s Side, ul Uplo, m, n int, alpha complex64, a []complex64, lda int, b []complex64, ldb int, beta complex64, c []complex64, ldc int) + Cherk(ul Uplo, t Transpose, n, k int, alpha float32, a []complex64, lda int, beta float32, c []complex64, ldc int) + Cher2k(ul Uplo, t Transpose, n, k int, alpha complex64, a []complex64, lda int, b []complex64, ldb int, beta float32, c []complex64, ldc int) +} + +// Complex128 implements the double precision complex BLAS routines. +type Complex128 interface { + Complex128Level1 + Complex128Level2 + Complex128Level3 +} + +// Complex128Level1 implements the double precision complex BLAS Level 1 routines. +type Complex128Level1 interface { + Zdotu(n int, x []complex128, incX int, y []complex128, incY int) (dotu complex128) + Zdotc(n int, x []complex128, incX int, y []complex128, incY int) (dotc complex128) + Dznrm2(n int, x []complex128, incX int) float64 + Dzasum(n int, x []complex128, incX int) float64 + Izamax(n int, x []complex128, incX int) int + Zswap(n int, x []complex128, incX int, y []complex128, incY int) + Zcopy(n int, x []complex128, incX int, y []complex128, incY int) + Zaxpy(n int, alpha complex128, x []complex128, incX int, y []complex128, incY int) + Zscal(n int, alpha complex128, x []complex128, incX int) + Zdscal(n int, alpha float64, x []complex128, incX int) +} + +// Complex128Level2 implements the double precision complex BLAS Level 2 routines. +type Complex128Level2 interface { + Zgemv(tA Transpose, m, n int, alpha complex128, a []complex128, lda int, x []complex128, incX int, beta complex128, y []complex128, incY int) + Zgbmv(tA Transpose, m, n int, kL int, kU int, alpha complex128, a []complex128, lda int, x []complex128, incX int, beta complex128, y []complex128, incY int) + Ztrmv(ul Uplo, tA Transpose, d Diag, n int, a []complex128, lda int, x []complex128, incX int) + Ztbmv(ul Uplo, tA Transpose, d Diag, n, k int, a []complex128, lda int, x []complex128, incX int) + Ztpmv(ul Uplo, tA Transpose, d Diag, n int, ap []complex128, x []complex128, incX int) + Ztrsv(ul Uplo, tA Transpose, d Diag, n int, a []complex128, lda int, x []complex128, incX int) + Ztbsv(ul Uplo, tA Transpose, d Diag, n, k int, a []complex128, lda int, x []complex128, incX int) + Ztpsv(ul Uplo, tA Transpose, d Diag, n int, ap []complex128, x []complex128, incX int) + Zhemv(ul Uplo, n int, alpha complex128, a []complex128, lda int, x []complex128, incX int, beta complex128, y []complex128, incY int) + Zhbmv(ul Uplo, n, k int, alpha complex128, a []complex128, lda int, x []complex128, incX int, beta complex128, y []complex128, incY int) + Zhpmv(ul Uplo, n int, alpha complex128, ap []complex128, x []complex128, incX int, beta complex128, y []complex128, incY int) + Zgeru(m, n int, alpha complex128, x []complex128, incX int, y []complex128, incY int, a []complex128, lda int) + Zgerc(m, n int, alpha complex128, x []complex128, incX int, y []complex128, incY int, a []complex128, lda int) + Zher(ul Uplo, n int, alpha float64, x []complex128, incX int, a []complex128, lda int) + Zhpr(ul Uplo, n int, alpha float64, x []complex128, incX int, a []complex128) + Zher2(ul Uplo, n int, alpha complex128, x []complex128, incX int, y []complex128, incY int, a []complex128, lda int) + Zhpr2(ul Uplo, n int, alpha complex128, x []complex128, incX int, y []complex128, incY int, ap []complex128) +} + +// Complex128Level3 implements the double precision complex BLAS Level 3 routines. +type Complex128Level3 interface { + Zgemm(tA, tB Transpose, m, n, k int, alpha complex128, a []complex128, lda int, b []complex128, ldb int, beta complex128, c []complex128, ldc int) + Zsymm(s Side, ul Uplo, m, n int, alpha complex128, a []complex128, lda int, b []complex128, ldb int, beta complex128, c []complex128, ldc int) + Zsyrk(ul Uplo, t Transpose, n, k int, alpha complex128, a []complex128, lda int, beta complex128, c []complex128, ldc int) + Zsyr2k(ul Uplo, t Transpose, n, k int, alpha complex128, a []complex128, lda int, b []complex128, ldb int, beta complex128, c []complex128, ldc int) + Ztrmm(s Side, ul Uplo, tA Transpose, d Diag, m, n int, alpha complex128, a []complex128, lda int, b []complex128, ldb int) + Ztrsm(s Side, ul Uplo, tA Transpose, d Diag, m, n int, alpha complex128, a []complex128, lda int, b []complex128, ldb int) + Zhemm(s Side, ul Uplo, m, n int, alpha complex128, a []complex128, lda int, b []complex128, ldb int, beta complex128, c []complex128, ldc int) + Zherk(ul Uplo, t Transpose, n, k int, alpha float64, a []complex128, lda int, beta float64, c []complex128, ldc int) + Zher2k(ul Uplo, t Transpose, n, k int, alpha complex128, a []complex128, lda int, b []complex128, ldb int, beta float64, c []complex128, ldc int) +} diff --git a/vendor/gonum.org/v1/gonum/blas/blas64/blas64.go b/vendor/gonum.org/v1/gonum/blas/blas64/blas64.go new file mode 100644 index 0000000000..64ac985c1c --- /dev/null +++ b/vendor/gonum.org/v1/gonum/blas/blas64/blas64.go @@ -0,0 +1,533 @@ +// Copyright ©2015 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package blas64 + +import ( + "gonum.org/v1/gonum/blas" + "gonum.org/v1/gonum/blas/gonum" +) + +var blas64 blas.Float64 = gonum.Implementation{} + +// Use sets the BLAS float64 implementation to be used by subsequent BLAS calls. +// The default implementation is +// gonum.org/v1/gonum/blas/gonum.Implementation. +func Use(b blas.Float64) { + blas64 = b +} + +// Implementation returns the current BLAS float64 implementation. +// +// Implementation allows direct calls to the current BLAS float64 implementation +// giving finer control of parameters. +func Implementation() blas.Float64 { + return blas64 +} + +// Vector represents a vector with an associated element increment. +type Vector struct { + N int + Data []float64 + Inc int +} + +// General represents a matrix using the conventional storage scheme. +type General struct { + Rows, Cols int + Data []float64 + Stride int +} + +// Band represents a band matrix using the band storage scheme. +type Band struct { + Rows, Cols int + KL, KU int + Data []float64 + Stride int +} + +// Triangular represents a triangular matrix using the conventional storage scheme. +type Triangular struct { + Uplo blas.Uplo + Diag blas.Diag + N int + Data []float64 + Stride int +} + +// TriangularBand represents a triangular matrix using the band storage scheme. +type TriangularBand struct { + Uplo blas.Uplo + Diag blas.Diag + N, K int + Data []float64 + Stride int +} + +// TriangularPacked represents a triangular matrix using the packed storage scheme. +type TriangularPacked struct { + Uplo blas.Uplo + Diag blas.Diag + N int + Data []float64 +} + +// Symmetric represents a symmetric matrix using the conventional storage scheme. +type Symmetric struct { + Uplo blas.Uplo + N int + Data []float64 + Stride int +} + +// SymmetricBand represents a symmetric matrix using the band storage scheme. +type SymmetricBand struct { + Uplo blas.Uplo + N, K int + Data []float64 + Stride int +} + +// SymmetricPacked represents a symmetric matrix using the packed storage scheme. +type SymmetricPacked struct { + Uplo blas.Uplo + N int + Data []float64 +} + +// Level 1 + +const ( + negInc = "blas64: negative vector increment" + badLength = "blas64: vector length mismatch" +) + +// Dot computes the dot product of the two vectors: +// +// \sum_i x[i]*y[i]. +// +// Dot will panic if the lengths of x and y do not match. +func Dot(x, y Vector) float64 { + if x.N != y.N { + panic(badLength) + } + return blas64.Ddot(x.N, x.Data, x.Inc, y.Data, y.Inc) +} + +// Nrm2 computes the Euclidean norm of the vector x: +// +// sqrt(\sum_i x[i]*x[i]). +// +// Nrm2 will panic if the vector increment is negative. +func Nrm2(x Vector) float64 { + if x.Inc < 0 { + panic(negInc) + } + return blas64.Dnrm2(x.N, x.Data, x.Inc) +} + +// Asum computes the sum of the absolute values of the elements of x: +// +// \sum_i |x[i]|. +// +// Asum will panic if the vector increment is negative. +func Asum(x Vector) float64 { + if x.Inc < 0 { + panic(negInc) + } + return blas64.Dasum(x.N, x.Data, x.Inc) +} + +// Iamax returns the index of an element of x with the largest absolute value. +// If there are multiple such indices the earliest is returned. +// Iamax returns -1 if n == 0. +// +// Iamax will panic if the vector increment is negative. +func Iamax(x Vector) int { + if x.Inc < 0 { + panic(negInc) + } + return blas64.Idamax(x.N, x.Data, x.Inc) +} + +// Swap exchanges the elements of the two vectors: +// +// x[i], y[i] = y[i], x[i] for all i. +// +// Swap will panic if the lengths of x and y do not match. +func Swap(x, y Vector) { + if x.N != y.N { + panic(badLength) + } + blas64.Dswap(x.N, x.Data, x.Inc, y.Data, y.Inc) +} + +// Copy copies the elements of x into the elements of y: +// +// y[i] = x[i] for all i. +// +// Copy will panic if the lengths of x and y do not match. +func Copy(x, y Vector) { + if x.N != y.N { + panic(badLength) + } + blas64.Dcopy(x.N, x.Data, x.Inc, y.Data, y.Inc) +} + +// Axpy adds x scaled by alpha to y: +// +// y[i] += alpha*x[i] for all i. +// +// Axpy will panic if the lengths of x and y do not match. +func Axpy(alpha float64, x, y Vector) { + if x.N != y.N { + panic(badLength) + } + blas64.Daxpy(x.N, alpha, x.Data, x.Inc, y.Data, y.Inc) +} + +// Rotg computes the parameters of a Givens plane rotation so that +// +// ⎡ c s⎤ ⎡a⎤ ⎡r⎤ +// ⎣-s c⎦ * ⎣b⎦ = ⎣0⎦ +// +// where a and b are the Cartesian coordinates of a given point. +// c, s, and r are defined as +// +// r = ±Sqrt(a^2 + b^2), +// c = a/r, the cosine of the rotation angle, +// s = a/r, the sine of the rotation angle, +// +// and z is defined such that +// +// if |a| > |b|, z = s, +// otherwise if c != 0, z = 1/c, +// otherwise z = 1. +func Rotg(a, b float64) (c, s, r, z float64) { + return blas64.Drotg(a, b) +} + +// Rotmg computes the modified Givens rotation. See +// http://www.netlib.org/lapack/explore-html/df/deb/drotmg_8f.html +// for more details. +func Rotmg(d1, d2, b1, b2 float64) (p blas.DrotmParams, rd1, rd2, rb1 float64) { + return blas64.Drotmg(d1, d2, b1, b2) +} + +// Rot applies a plane transformation to n points represented by the vectors x +// and y: +// +// x[i] = c*x[i] + s*y[i], +// y[i] = -s*x[i] + c*y[i], for all i. +func Rot(x, y Vector, c, s float64) { + if x.N != y.N { + panic(badLength) + } + blas64.Drot(x.N, x.Data, x.Inc, y.Data, y.Inc, c, s) +} + +// Rotm applies the modified Givens rotation to n points represented by the +// vectors x and y. +func Rotm(x, y Vector, p blas.DrotmParams) { + if x.N != y.N { + panic(badLength) + } + blas64.Drotm(x.N, x.Data, x.Inc, y.Data, y.Inc, p) +} + +// Scal scales the vector x by alpha: +// +// x[i] *= alpha for all i. +// +// Scal will panic if the vector increment is negative. +func Scal(alpha float64, x Vector) { + if x.Inc < 0 { + panic(negInc) + } + blas64.Dscal(x.N, alpha, x.Data, x.Inc) +} + +// Level 2 + +// Gemv computes +// +// y = alpha * A * x + beta * y if t == blas.NoTrans, +// y = alpha * Aᵀ * x + beta * y if t == blas.Trans or blas.ConjTrans, +// +// where A is an m×n dense matrix, x and y are vectors, and alpha and beta are scalars. +func Gemv(t blas.Transpose, alpha float64, a General, x Vector, beta float64, y Vector) { + blas64.Dgemv(t, a.Rows, a.Cols, alpha, a.Data, a.Stride, x.Data, x.Inc, beta, y.Data, y.Inc) +} + +// Gbmv computes +// +// y = alpha * A * x + beta * y if t == blas.NoTrans, +// y = alpha * Aᵀ * x + beta * y if t == blas.Trans or blas.ConjTrans, +// +// where A is an m×n band matrix, x and y are vectors, and alpha and beta are scalars. +func Gbmv(t blas.Transpose, alpha float64, a Band, x Vector, beta float64, y Vector) { + blas64.Dgbmv(t, a.Rows, a.Cols, a.KL, a.KU, alpha, a.Data, a.Stride, x.Data, x.Inc, beta, y.Data, y.Inc) +} + +// Trmv computes +// +// x = A * x if t == blas.NoTrans, +// x = Aᵀ * x if t == blas.Trans or blas.ConjTrans, +// +// where A is an n×n triangular matrix, and x is a vector. +func Trmv(t blas.Transpose, a Triangular, x Vector) { + blas64.Dtrmv(a.Uplo, t, a.Diag, a.N, a.Data, a.Stride, x.Data, x.Inc) +} + +// Tbmv computes +// +// x = A * x if t == blas.NoTrans, +// x = Aᵀ * x if t == blas.Trans or blas.ConjTrans, +// +// where A is an n×n triangular band matrix, and x is a vector. +func Tbmv(t blas.Transpose, a TriangularBand, x Vector) { + blas64.Dtbmv(a.Uplo, t, a.Diag, a.N, a.K, a.Data, a.Stride, x.Data, x.Inc) +} + +// Tpmv computes +// +// x = A * x if t == blas.NoTrans, +// x = Aᵀ * x if t == blas.Trans or blas.ConjTrans, +// +// where A is an n×n triangular matrix in packed format, and x is a vector. +func Tpmv(t blas.Transpose, a TriangularPacked, x Vector) { + blas64.Dtpmv(a.Uplo, t, a.Diag, a.N, a.Data, x.Data, x.Inc) +} + +// Trsv solves +// +// A * x = b if t == blas.NoTrans, +// Aᵀ * x = b if t == blas.Trans or blas.ConjTrans, +// +// where A is an n×n triangular matrix, and x and b are vectors. +// +// At entry to the function, x contains the values of b, and the result is +// stored in-place into x. +// +// No test for singularity or near-singularity is included in this +// routine. Such tests must be performed before calling this routine. +func Trsv(t blas.Transpose, a Triangular, x Vector) { + blas64.Dtrsv(a.Uplo, t, a.Diag, a.N, a.Data, a.Stride, x.Data, x.Inc) +} + +// Tbsv solves +// +// A * x = b if t == blas.NoTrans, +// Aᵀ * x = b if t == blas.Trans or blas.ConjTrans, +// +// where A is an n×n triangular band matrix, and x and b are vectors. +// +// At entry to the function, x contains the values of b, and the result is +// stored in place into x. +// +// No test for singularity or near-singularity is included in this +// routine. Such tests must be performed before calling this routine. +func Tbsv(t blas.Transpose, a TriangularBand, x Vector) { + blas64.Dtbsv(a.Uplo, t, a.Diag, a.N, a.K, a.Data, a.Stride, x.Data, x.Inc) +} + +// Tpsv solves +// +// A * x = b if t == blas.NoTrans, +// Aᵀ * x = b if t == blas.Trans or blas.ConjTrans, +// +// where A is an n×n triangular matrix in packed format, and x and b are +// vectors. +// +// At entry to the function, x contains the values of b, and the result is +// stored in place into x. +// +// No test for singularity or near-singularity is included in this +// routine. Such tests must be performed before calling this routine. +func Tpsv(t blas.Transpose, a TriangularPacked, x Vector) { + blas64.Dtpsv(a.Uplo, t, a.Diag, a.N, a.Data, x.Data, x.Inc) +} + +// Symv computes +// +// y = alpha * A * x + beta * y, +// +// where A is an n×n symmetric matrix, x and y are vectors, and alpha and +// beta are scalars. +func Symv(alpha float64, a Symmetric, x Vector, beta float64, y Vector) { + blas64.Dsymv(a.Uplo, a.N, alpha, a.Data, a.Stride, x.Data, x.Inc, beta, y.Data, y.Inc) +} + +// Sbmv performs +// +// y = alpha * A * x + beta * y, +// +// where A is an n×n symmetric band matrix, x and y are vectors, and alpha +// and beta are scalars. +func Sbmv(alpha float64, a SymmetricBand, x Vector, beta float64, y Vector) { + blas64.Dsbmv(a.Uplo, a.N, a.K, alpha, a.Data, a.Stride, x.Data, x.Inc, beta, y.Data, y.Inc) +} + +// Spmv performs +// +// y = alpha * A * x + beta * y, +// +// where A is an n×n symmetric matrix in packed format, x and y are vectors, +// and alpha and beta are scalars. +func Spmv(alpha float64, a SymmetricPacked, x Vector, beta float64, y Vector) { + blas64.Dspmv(a.Uplo, a.N, alpha, a.Data, x.Data, x.Inc, beta, y.Data, y.Inc) +} + +// Ger performs a rank-1 update +// +// A += alpha * x * yᵀ, +// +// where A is an m×n dense matrix, x and y are vectors, and alpha is a scalar. +func Ger(alpha float64, x, y Vector, a General) { + blas64.Dger(a.Rows, a.Cols, alpha, x.Data, x.Inc, y.Data, y.Inc, a.Data, a.Stride) +} + +// Syr performs a rank-1 update +// +// A += alpha * x * xᵀ, +// +// where A is an n×n symmetric matrix, x is a vector, and alpha is a scalar. +func Syr(alpha float64, x Vector, a Symmetric) { + blas64.Dsyr(a.Uplo, a.N, alpha, x.Data, x.Inc, a.Data, a.Stride) +} + +// Spr performs the rank-1 update +// +// A += alpha * x * xᵀ, +// +// where A is an n×n symmetric matrix in packed format, x is a vector, and +// alpha is a scalar. +func Spr(alpha float64, x Vector, a SymmetricPacked) { + blas64.Dspr(a.Uplo, a.N, alpha, x.Data, x.Inc, a.Data) +} + +// Syr2 performs a rank-2 update +// +// A += alpha * x * yᵀ + alpha * y * xᵀ, +// +// where A is a symmetric n×n matrix, x and y are vectors, and alpha is a scalar. +func Syr2(alpha float64, x, y Vector, a Symmetric) { + blas64.Dsyr2(a.Uplo, a.N, alpha, x.Data, x.Inc, y.Data, y.Inc, a.Data, a.Stride) +} + +// Spr2 performs a rank-2 update +// +// A += alpha * x * yᵀ + alpha * y * xᵀ, +// +// where A is an n×n symmetric matrix in packed format, x and y are vectors, +// and alpha is a scalar. +func Spr2(alpha float64, x, y Vector, a SymmetricPacked) { + blas64.Dspr2(a.Uplo, a.N, alpha, x.Data, x.Inc, y.Data, y.Inc, a.Data) +} + +// Level 3 + +// Gemm computes +// +// C = alpha * A * B + beta * C, +// +// where A, B, and C are dense matrices, and alpha and beta are scalars. +// tA and tB specify whether A or B are transposed. +func Gemm(tA, tB blas.Transpose, alpha float64, a, b General, beta float64, c General) { + var m, n, k int + if tA == blas.NoTrans { + m, k = a.Rows, a.Cols + } else { + m, k = a.Cols, a.Rows + } + if tB == blas.NoTrans { + n = b.Cols + } else { + n = b.Rows + } + blas64.Dgemm(tA, tB, m, n, k, alpha, a.Data, a.Stride, b.Data, b.Stride, beta, c.Data, c.Stride) +} + +// Symm performs +// +// C = alpha * A * B + beta * C if s == blas.Left, +// C = alpha * B * A + beta * C if s == blas.Right, +// +// where A is an n×n or m×m symmetric matrix, B and C are m×n matrices, and +// alpha is a scalar. +func Symm(s blas.Side, alpha float64, a Symmetric, b General, beta float64, c General) { + var m, n int + if s == blas.Left { + m, n = a.N, b.Cols + } else { + m, n = b.Rows, a.N + } + blas64.Dsymm(s, a.Uplo, m, n, alpha, a.Data, a.Stride, b.Data, b.Stride, beta, c.Data, c.Stride) +} + +// Syrk performs a symmetric rank-k update +// +// C = alpha * A * Aᵀ + beta * C if t == blas.NoTrans, +// C = alpha * Aᵀ * A + beta * C if t == blas.Trans or blas.ConjTrans, +// +// where C is an n×n symmetric matrix, A is an n×k matrix if t == blas.NoTrans and +// a k×n matrix otherwise, and alpha and beta are scalars. +func Syrk(t blas.Transpose, alpha float64, a General, beta float64, c Symmetric) { + var n, k int + if t == blas.NoTrans { + n, k = a.Rows, a.Cols + } else { + n, k = a.Cols, a.Rows + } + blas64.Dsyrk(c.Uplo, t, n, k, alpha, a.Data, a.Stride, beta, c.Data, c.Stride) +} + +// Syr2k performs a symmetric rank-2k update +// +// C = alpha * A * Bᵀ + alpha * B * Aᵀ + beta * C if t == blas.NoTrans, +// C = alpha * Aᵀ * B + alpha * Bᵀ * A + beta * C if t == blas.Trans or blas.ConjTrans, +// +// where C is an n×n symmetric matrix, A and B are n×k matrices if t == NoTrans +// and k×n matrices otherwise, and alpha and beta are scalars. +func Syr2k(t blas.Transpose, alpha float64, a, b General, beta float64, c Symmetric) { + var n, k int + if t == blas.NoTrans { + n, k = a.Rows, a.Cols + } else { + n, k = a.Cols, a.Rows + } + blas64.Dsyr2k(c.Uplo, t, n, k, alpha, a.Data, a.Stride, b.Data, b.Stride, beta, c.Data, c.Stride) +} + +// Trmm performs +// +// B = alpha * A * B if tA == blas.NoTrans and s == blas.Left, +// B = alpha * Aᵀ * B if tA == blas.Trans or blas.ConjTrans, and s == blas.Left, +// B = alpha * B * A if tA == blas.NoTrans and s == blas.Right, +// B = alpha * B * Aᵀ if tA == blas.Trans or blas.ConjTrans, and s == blas.Right, +// +// where A is an n×n or m×m triangular matrix, B is an m×n matrix, and alpha is +// a scalar. +func Trmm(s blas.Side, tA blas.Transpose, alpha float64, a Triangular, b General) { + blas64.Dtrmm(s, a.Uplo, tA, a.Diag, b.Rows, b.Cols, alpha, a.Data, a.Stride, b.Data, b.Stride) +} + +// Trsm solves +// +// A * X = alpha * B if tA == blas.NoTrans and s == blas.Left, +// Aᵀ * X = alpha * B if tA == blas.Trans or blas.ConjTrans, and s == blas.Left, +// X * A = alpha * B if tA == blas.NoTrans and s == blas.Right, +// X * Aᵀ = alpha * B if tA == blas.Trans or blas.ConjTrans, and s == blas.Right, +// +// where A is an n×n or m×m triangular matrix, X and B are m×n matrices, and +// alpha is a scalar. +// +// At entry to the function, X contains the values of B, and the result is +// stored in-place into X. +// +// No check is made that A is invertible. +func Trsm(s blas.Side, tA blas.Transpose, alpha float64, a Triangular, b General) { + blas64.Dtrsm(s, a.Uplo, tA, a.Diag, b.Rows, b.Cols, alpha, a.Data, a.Stride, b.Data, b.Stride) +} diff --git a/vendor/gonum.org/v1/gonum/blas/blas64/conv.go b/vendor/gonum.org/v1/gonum/blas/blas64/conv.go new file mode 100644 index 0000000000..695557d13a --- /dev/null +++ b/vendor/gonum.org/v1/gonum/blas/blas64/conv.go @@ -0,0 +1,263 @@ +// Copyright ©2015 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package blas64 + +import "gonum.org/v1/gonum/blas" + +// GeneralCols represents a matrix using the conventional column-major storage scheme. +type GeneralCols General + +// From fills the receiver with elements from a. The receiver +// must have the same dimensions as a and have adequate backing +// data storage. +func (t GeneralCols) From(a General) { + if t.Rows != a.Rows || t.Cols != a.Cols { + panic("blas64: mismatched dimension") + } + if len(t.Data) < (t.Cols-1)*t.Stride+t.Rows { + panic("blas64: short data slice") + } + for i := 0; i < a.Rows; i++ { + for j, v := range a.Data[i*a.Stride : i*a.Stride+a.Cols] { + t.Data[i+j*t.Stride] = v + } + } +} + +// From fills the receiver with elements from a. The receiver +// must have the same dimensions as a and have adequate backing +// data storage. +func (t General) From(a GeneralCols) { + if t.Rows != a.Rows || t.Cols != a.Cols { + panic("blas64: mismatched dimension") + } + if len(t.Data) < (t.Rows-1)*t.Stride+t.Cols { + panic("blas64: short data slice") + } + for j := 0; j < a.Cols; j++ { + for i, v := range a.Data[j*a.Stride : j*a.Stride+a.Rows] { + t.Data[i*t.Stride+j] = v + } + } +} + +// TriangularCols represents a matrix using the conventional column-major storage scheme. +type TriangularCols Triangular + +// From fills the receiver with elements from a. The receiver +// must have the same dimensions, uplo and diag as a and have +// adequate backing data storage. +func (t TriangularCols) From(a Triangular) { + if t.N != a.N { + panic("blas64: mismatched dimension") + } + if t.Uplo != a.Uplo { + panic("blas64: mismatched BLAS uplo") + } + if t.Diag != a.Diag { + panic("blas64: mismatched BLAS diag") + } + switch a.Uplo { + default: + panic("blas64: bad BLAS uplo") + case blas.Upper: + for i := 0; i < a.N; i++ { + for j := i; j < a.N; j++ { + t.Data[i+j*t.Stride] = a.Data[i*a.Stride+j] + } + } + case blas.Lower: + for i := 0; i < a.N; i++ { + for j := 0; j <= i; j++ { + t.Data[i+j*t.Stride] = a.Data[i*a.Stride+j] + } + } + case blas.All: + for i := 0; i < a.N; i++ { + for j := 0; j < a.N; j++ { + t.Data[i+j*t.Stride] = a.Data[i*a.Stride+j] + } + } + } +} + +// From fills the receiver with elements from a. The receiver +// must have the same dimensions, uplo and diag as a and have +// adequate backing data storage. +func (t Triangular) From(a TriangularCols) { + if t.N != a.N { + panic("blas64: mismatched dimension") + } + if t.Uplo != a.Uplo { + panic("blas64: mismatched BLAS uplo") + } + if t.Diag != a.Diag { + panic("blas64: mismatched BLAS diag") + } + switch a.Uplo { + default: + panic("blas64: bad BLAS uplo") + case blas.Upper: + for i := 0; i < a.N; i++ { + for j := i; j < a.N; j++ { + t.Data[i*t.Stride+j] = a.Data[i+j*a.Stride] + } + } + case blas.Lower: + for i := 0; i < a.N; i++ { + for j := 0; j <= i; j++ { + t.Data[i*t.Stride+j] = a.Data[i+j*a.Stride] + } + } + case blas.All: + for i := 0; i < a.N; i++ { + for j := 0; j < a.N; j++ { + t.Data[i*t.Stride+j] = a.Data[i+j*a.Stride] + } + } + } +} + +// BandCols represents a matrix using the band column-major storage scheme. +type BandCols Band + +// From fills the receiver with elements from a. The receiver +// must have the same dimensions and bandwidth as a and have +// adequate backing data storage. +func (t BandCols) From(a Band) { + if t.Rows != a.Rows || t.Cols != a.Cols { + panic("blas64: mismatched dimension") + } + if t.KL != a.KL || t.KU != a.KU { + panic("blas64: mismatched bandwidth") + } + if a.Stride < a.KL+a.KU+1 { + panic("blas64: short stride for source") + } + if t.Stride < t.KL+t.KU+1 { + panic("blas64: short stride for destination") + } + for i := 0; i < a.Rows; i++ { + for j := max(0, i-a.KL); j < min(i+a.KU+1, a.Cols); j++ { + t.Data[i+t.KU-j+j*t.Stride] = a.Data[j+a.KL-i+i*a.Stride] + } + } +} + +// From fills the receiver with elements from a. The receiver +// must have the same dimensions and bandwidth as a and have +// adequate backing data storage. +func (t Band) From(a BandCols) { + if t.Rows != a.Rows || t.Cols != a.Cols { + panic("blas64: mismatched dimension") + } + if t.KL != a.KL || t.KU != a.KU { + panic("blas64: mismatched bandwidth") + } + if a.Stride < a.KL+a.KU+1 { + panic("blas64: short stride for source") + } + if t.Stride < t.KL+t.KU+1 { + panic("blas64: short stride for destination") + } + for j := 0; j < a.Cols; j++ { + for i := max(0, j-a.KU); i < min(j+a.KL+1, a.Rows); i++ { + t.Data[j+a.KL-i+i*a.Stride] = a.Data[i+t.KU-j+j*t.Stride] + } + } +} + +// TriangularBandCols represents a triangular matrix using the band column-major storage scheme. +type TriangularBandCols TriangularBand + +// From fills the receiver with elements from a. The receiver +// must have the same dimensions, bandwidth and uplo as a and +// have adequate backing data storage. +func (t TriangularBandCols) From(a TriangularBand) { + if t.N != a.N { + panic("blas64: mismatched dimension") + } + if t.K != a.K { + panic("blas64: mismatched bandwidth") + } + if a.Stride < a.K+1 { + panic("blas64: short stride for source") + } + if t.Stride < t.K+1 { + panic("blas64: short stride for destination") + } + if t.Uplo != a.Uplo { + panic("blas64: mismatched BLAS uplo") + } + if t.Diag != a.Diag { + panic("blas64: mismatched BLAS diag") + } + dst := BandCols{ + Rows: t.N, Cols: t.N, + Stride: t.Stride, + Data: t.Data, + } + src := Band{ + Rows: a.N, Cols: a.N, + Stride: a.Stride, + Data: a.Data, + } + switch a.Uplo { + default: + panic("blas64: bad BLAS uplo") + case blas.Upper: + dst.KU = t.K + src.KU = a.K + case blas.Lower: + dst.KL = t.K + src.KL = a.K + } + dst.From(src) +} + +// From fills the receiver with elements from a. The receiver +// must have the same dimensions, bandwidth and uplo as a and +// have adequate backing data storage. +func (t TriangularBand) From(a TriangularBandCols) { + if t.N != a.N { + panic("blas64: mismatched dimension") + } + if t.K != a.K { + panic("blas64: mismatched bandwidth") + } + if a.Stride < a.K+1 { + panic("blas64: short stride for source") + } + if t.Stride < t.K+1 { + panic("blas64: short stride for destination") + } + if t.Uplo != a.Uplo { + panic("blas64: mismatched BLAS uplo") + } + if t.Diag != a.Diag { + panic("blas64: mismatched BLAS diag") + } + dst := Band{ + Rows: t.N, Cols: t.N, + Stride: t.Stride, + Data: t.Data, + } + src := BandCols{ + Rows: a.N, Cols: a.N, + Stride: a.Stride, + Data: a.Data, + } + switch a.Uplo { + default: + panic("blas64: bad BLAS uplo") + case blas.Upper: + dst.KU = t.K + src.KU = a.K + case blas.Lower: + dst.KL = t.K + src.KL = a.K + } + dst.From(src) +} diff --git a/vendor/gonum.org/v1/gonum/blas/blas64/conv_symmetric.go b/vendor/gonum.org/v1/gonum/blas/blas64/conv_symmetric.go new file mode 100644 index 0000000000..5146f1a1c3 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/blas/blas64/conv_symmetric.go @@ -0,0 +1,153 @@ +// Copyright ©2015 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package blas64 + +import "gonum.org/v1/gonum/blas" + +// SymmetricCols represents a matrix using the conventional column-major storage scheme. +type SymmetricCols Symmetric + +// From fills the receiver with elements from a. The receiver +// must have the same dimensions and uplo as a and have adequate +// backing data storage. +func (t SymmetricCols) From(a Symmetric) { + if t.N != a.N { + panic("blas64: mismatched dimension") + } + if t.Uplo != a.Uplo { + panic("blas64: mismatched BLAS uplo") + } + switch a.Uplo { + default: + panic("blas64: bad BLAS uplo") + case blas.Upper: + for i := 0; i < a.N; i++ { + for j := i; j < a.N; j++ { + t.Data[i+j*t.Stride] = a.Data[i*a.Stride+j] + } + } + case blas.Lower: + for i := 0; i < a.N; i++ { + for j := 0; j <= i; j++ { + t.Data[i+j*t.Stride] = a.Data[i*a.Stride+j] + } + } + } +} + +// From fills the receiver with elements from a. The receiver +// must have the same dimensions and uplo as a and have adequate +// backing data storage. +func (t Symmetric) From(a SymmetricCols) { + if t.N != a.N { + panic("blas64: mismatched dimension") + } + if t.Uplo != a.Uplo { + panic("blas64: mismatched BLAS uplo") + } + switch a.Uplo { + default: + panic("blas64: bad BLAS uplo") + case blas.Upper: + for i := 0; i < a.N; i++ { + for j := i; j < a.N; j++ { + t.Data[i*t.Stride+j] = a.Data[i+j*a.Stride] + } + } + case blas.Lower: + for i := 0; i < a.N; i++ { + for j := 0; j <= i; j++ { + t.Data[i*t.Stride+j] = a.Data[i+j*a.Stride] + } + } + } +} + +// SymmetricBandCols represents a symmetric matrix using the band column-major storage scheme. +type SymmetricBandCols SymmetricBand + +// From fills the receiver with elements from a. The receiver +// must have the same dimensions, bandwidth and uplo as a and +// have adequate backing data storage. +func (t SymmetricBandCols) From(a SymmetricBand) { + if t.N != a.N { + panic("blas64: mismatched dimension") + } + if t.K != a.K { + panic("blas64: mismatched bandwidth") + } + if a.Stride < a.K+1 { + panic("blas64: short stride for source") + } + if t.Stride < t.K+1 { + panic("blas64: short stride for destination") + } + if t.Uplo != a.Uplo { + panic("blas64: mismatched BLAS uplo") + } + dst := BandCols{ + Rows: t.N, Cols: t.N, + Stride: t.Stride, + Data: t.Data, + } + src := Band{ + Rows: a.N, Cols: a.N, + Stride: a.Stride, + Data: a.Data, + } + switch a.Uplo { + default: + panic("blas64: bad BLAS uplo") + case blas.Upper: + dst.KU = t.K + src.KU = a.K + case blas.Lower: + dst.KL = t.K + src.KL = a.K + } + dst.From(src) +} + +// From fills the receiver with elements from a. The receiver +// must have the same dimensions, bandwidth and uplo as a and +// have adequate backing data storage. +func (t SymmetricBand) From(a SymmetricBandCols) { + if t.N != a.N { + panic("blas64: mismatched dimension") + } + if t.K != a.K { + panic("blas64: mismatched bandwidth") + } + if a.Stride < a.K+1 { + panic("blas64: short stride for source") + } + if t.Stride < t.K+1 { + panic("blas64: short stride for destination") + } + if t.Uplo != a.Uplo { + panic("blas64: mismatched BLAS uplo") + } + dst := Band{ + Rows: t.N, Cols: t.N, + Stride: t.Stride, + Data: t.Data, + } + src := BandCols{ + Rows: a.N, Cols: a.N, + Stride: a.Stride, + Data: a.Data, + } + switch a.Uplo { + default: + panic("blas64: bad BLAS uplo") + case blas.Upper: + dst.KU = t.K + src.KU = a.K + case blas.Lower: + dst.KL = t.K + src.KL = a.K + } + dst.From(src) +} diff --git a/vendor/gonum.org/v1/gonum/blas/blas64/doc.go b/vendor/gonum.org/v1/gonum/blas/blas64/doc.go new file mode 100644 index 0000000000..7410cee486 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/blas/blas64/doc.go @@ -0,0 +1,6 @@ +// Copyright ©2017 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package blas64 provides a simple interface to the float64 BLAS API. +package blas64 // import "gonum.org/v1/gonum/blas/blas64" diff --git a/vendor/gonum.org/v1/gonum/blas/cblas128/cblas128.go b/vendor/gonum.org/v1/gonum/blas/cblas128/cblas128.go new file mode 100644 index 0000000000..82a6f22e2b --- /dev/null +++ b/vendor/gonum.org/v1/gonum/blas/cblas128/cblas128.go @@ -0,0 +1,600 @@ +// Copyright ©2015 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package cblas128 + +import ( + "gonum.org/v1/gonum/blas" + "gonum.org/v1/gonum/blas/gonum" +) + +var cblas128 blas.Complex128 = gonum.Implementation{} + +// Use sets the BLAS complex128 implementation to be used by subsequent BLAS calls. +// The default implementation is +// gonum.org/v1/gonum/blas/gonum.Implementation. +func Use(b blas.Complex128) { + cblas128 = b +} + +// Implementation returns the current BLAS complex128 implementation. +// +// Implementation allows direct calls to the current the BLAS complex128 implementation +// giving finer control of parameters. +func Implementation() blas.Complex128 { + return cblas128 +} + +// Vector represents a vector with an associated element increment. +type Vector struct { + N int + Inc int + Data []complex128 +} + +// General represents a matrix using the conventional storage scheme. +type General struct { + Rows, Cols int + Stride int + Data []complex128 +} + +// Band represents a band matrix using the band storage scheme. +type Band struct { + Rows, Cols int + KL, KU int + Stride int + Data []complex128 +} + +// Triangular represents a triangular matrix using the conventional storage scheme. +type Triangular struct { + N int + Stride int + Data []complex128 + Uplo blas.Uplo + Diag blas.Diag +} + +// TriangularBand represents a triangular matrix using the band storage scheme. +type TriangularBand struct { + N, K int + Stride int + Data []complex128 + Uplo blas.Uplo + Diag blas.Diag +} + +// TriangularPacked represents a triangular matrix using the packed storage scheme. +type TriangularPacked struct { + N int + Data []complex128 + Uplo blas.Uplo + Diag blas.Diag +} + +// Symmetric represents a symmetric matrix using the conventional storage scheme. +type Symmetric struct { + N int + Stride int + Data []complex128 + Uplo blas.Uplo +} + +// SymmetricBand represents a symmetric matrix using the band storage scheme. +type SymmetricBand struct { + N, K int + Stride int + Data []complex128 + Uplo blas.Uplo +} + +// SymmetricPacked represents a symmetric matrix using the packed storage scheme. +type SymmetricPacked struct { + N int + Data []complex128 + Uplo blas.Uplo +} + +// Hermitian represents an Hermitian matrix using the conventional storage scheme. +type Hermitian Symmetric + +// HermitianBand represents an Hermitian matrix using the band storage scheme. +type HermitianBand SymmetricBand + +// HermitianPacked represents an Hermitian matrix using the packed storage scheme. +type HermitianPacked SymmetricPacked + +// Level 1 + +const ( + negInc = "cblas128: negative vector increment" + badLength = "cblas128: vector length mismatch" +) + +// Dotu computes the dot product of the two vectors without +// complex conjugation: +// +// xᵀ * y. +// +// Dotu will panic if the lengths of x and y do not match. +func Dotu(x, y Vector) complex128 { + if x.N != y.N { + panic(badLength) + } + return cblas128.Zdotu(x.N, x.Data, x.Inc, y.Data, y.Inc) +} + +// Dotc computes the dot product of the two vectors with +// complex conjugation: +// +// xᴴ * y. +// +// Dotc will panic if the lengths of x and y do not match. +func Dotc(x, y Vector) complex128 { + if x.N != y.N { + panic(badLength) + } + return cblas128.Zdotc(x.N, x.Data, x.Inc, y.Data, y.Inc) +} + +// Nrm2 computes the Euclidean norm of the vector x: +// +// sqrt(\sum_i x[i] * x[i]). +// +// Nrm2 will panic if the vector increment is negative. +func Nrm2(x Vector) float64 { + if x.Inc < 0 { + panic(negInc) + } + return cblas128.Dznrm2(x.N, x.Data, x.Inc) +} + +// Asum computes the sum of magnitudes of the real and imaginary parts of +// elements of the vector x: +// +// \sum_i (|Re x[i]| + |Im x[i]|). +// +// Asum will panic if the vector increment is negative. +func Asum(x Vector) float64 { + if x.Inc < 0 { + panic(negInc) + } + return cblas128.Dzasum(x.N, x.Data, x.Inc) +} + +// Iamax returns the index of an element of x with the largest sum of +// magnitudes of the real and imaginary parts (|Re x[i]|+|Im x[i]|). +// If there are multiple such indices, the earliest is returned. +// +// Iamax returns -1 if n == 0. +// +// Iamax will panic if the vector increment is negative. +func Iamax(x Vector) int { + if x.Inc < 0 { + panic(negInc) + } + return cblas128.Izamax(x.N, x.Data, x.Inc) +} + +// Swap exchanges the elements of two vectors: +// +// x[i], y[i] = y[i], x[i] for all i. +// +// Swap will panic if the lengths of x and y do not match. +func Swap(x, y Vector) { + if x.N != y.N { + panic(badLength) + } + cblas128.Zswap(x.N, x.Data, x.Inc, y.Data, y.Inc) +} + +// Copy copies the elements of x into the elements of y: +// +// y[i] = x[i] for all i. +// +// Copy will panic if the lengths of x and y do not match. +func Copy(x, y Vector) { + if x.N != y.N { + panic(badLength) + } + cblas128.Zcopy(x.N, x.Data, x.Inc, y.Data, y.Inc) +} + +// Axpy computes +// +// y = alpha * x + y, +// +// where x and y are vectors, and alpha is a scalar. +// Axpy will panic if the lengths of x and y do not match. +func Axpy(alpha complex128, x, y Vector) { + if x.N != y.N { + panic(badLength) + } + cblas128.Zaxpy(x.N, alpha, x.Data, x.Inc, y.Data, y.Inc) +} + +// Scal computes +// +// x = alpha * x, +// +// where x is a vector, and alpha is a scalar. +// +// Scal will panic if the vector increment is negative. +func Scal(alpha complex128, x Vector) { + if x.Inc < 0 { + panic(negInc) + } + cblas128.Zscal(x.N, alpha, x.Data, x.Inc) +} + +// Dscal computes +// +// x = alpha * x, +// +// where x is a vector, and alpha is a real scalar. +// +// Dscal will panic if the vector increment is negative. +func Dscal(alpha float64, x Vector) { + if x.Inc < 0 { + panic(negInc) + } + cblas128.Zdscal(x.N, alpha, x.Data, x.Inc) +} + +// Level 2 + +// Gemv computes +// +// y = alpha * A * x + beta * y if t == blas.NoTrans, +// y = alpha * Aᵀ * x + beta * y if t == blas.Trans, +// y = alpha * Aᴴ * x + beta * y if t == blas.ConjTrans, +// +// where A is an m×n dense matrix, x and y are vectors, and alpha and beta are +// scalars. +func Gemv(t blas.Transpose, alpha complex128, a General, x Vector, beta complex128, y Vector) { + cblas128.Zgemv(t, a.Rows, a.Cols, alpha, a.Data, a.Stride, x.Data, x.Inc, beta, y.Data, y.Inc) +} + +// Gbmv computes +// +// y = alpha * A * x + beta * y if t == blas.NoTrans, +// y = alpha * Aᵀ * x + beta * y if t == blas.Trans, +// y = alpha * Aᴴ * x + beta * y if t == blas.ConjTrans, +// +// where A is an m×n band matrix, x and y are vectors, and alpha and beta are +// scalars. +func Gbmv(t blas.Transpose, alpha complex128, a Band, x Vector, beta complex128, y Vector) { + cblas128.Zgbmv(t, a.Rows, a.Cols, a.KL, a.KU, alpha, a.Data, a.Stride, x.Data, x.Inc, beta, y.Data, y.Inc) +} + +// Trmv computes +// +// x = A * x if t == blas.NoTrans, +// x = Aᵀ * x if t == blas.Trans, +// x = Aᴴ * x if t == blas.ConjTrans, +// +// where A is an n×n triangular matrix, and x is a vector. +func Trmv(t blas.Transpose, a Triangular, x Vector) { + cblas128.Ztrmv(a.Uplo, t, a.Diag, a.N, a.Data, a.Stride, x.Data, x.Inc) +} + +// Tbmv computes +// +// x = A * x if t == blas.NoTrans, +// x = Aᵀ * x if t == blas.Trans, +// x = Aᴴ * x if t == blas.ConjTrans, +// +// where A is an n×n triangular band matrix, and x is a vector. +func Tbmv(t blas.Transpose, a TriangularBand, x Vector) { + cblas128.Ztbmv(a.Uplo, t, a.Diag, a.N, a.K, a.Data, a.Stride, x.Data, x.Inc) +} + +// Tpmv computes +// +// x = A * x if t == blas.NoTrans, +// x = Aᵀ * x if t == blas.Trans, +// x = Aᴴ * x if t == blas.ConjTrans, +// +// where A is an n×n triangular matrix in packed format, and x is a vector. +func Tpmv(t blas.Transpose, a TriangularPacked, x Vector) { + cblas128.Ztpmv(a.Uplo, t, a.Diag, a.N, a.Data, x.Data, x.Inc) +} + +// Trsv solves +// +// A * x = b if t == blas.NoTrans, +// Aᵀ * x = b if t == blas.Trans, +// Aᴴ * x = b if t == blas.ConjTrans, +// +// where A is an n×n triangular matrix and x is a vector. +// +// At entry to the function, x contains the values of b, and the result is +// stored in-place into x. +// +// No test for singularity or near-singularity is included in this +// routine. Such tests must be performed before calling this routine. +func Trsv(t blas.Transpose, a Triangular, x Vector) { + cblas128.Ztrsv(a.Uplo, t, a.Diag, a.N, a.Data, a.Stride, x.Data, x.Inc) +} + +// Tbsv solves +// +// A * x = b if t == blas.NoTrans, +// Aᵀ * x = b if t == blas.Trans, +// Aᴴ * x = b if t == blas.ConjTrans, +// +// where A is an n×n triangular band matrix, and x is a vector. +// +// At entry to the function, x contains the values of b, and the result is +// stored in-place into x. +// +// No test for singularity or near-singularity is included in this +// routine. Such tests must be performed before calling this routine. +func Tbsv(t blas.Transpose, a TriangularBand, x Vector) { + cblas128.Ztbsv(a.Uplo, t, a.Diag, a.N, a.K, a.Data, a.Stride, x.Data, x.Inc) +} + +// Tpsv solves +// +// A * x = b if t == blas.NoTrans, +// Aᵀ * x = b if t == blas.Trans, +// Aᴴ * x = b if t == blas.ConjTrans, +// +// where A is an n×n triangular matrix in packed format and x is a vector. +// +// At entry to the function, x contains the values of b, and the result is +// stored in-place into x. +// +// No test for singularity or near-singularity is included in this +// routine. Such tests must be performed before calling this routine. +func Tpsv(t blas.Transpose, a TriangularPacked, x Vector) { + cblas128.Ztpsv(a.Uplo, t, a.Diag, a.N, a.Data, x.Data, x.Inc) +} + +// Hemv computes +// +// y = alpha * A * x + beta * y, +// +// where A is an n×n Hermitian matrix, x and y are vectors, and alpha and +// beta are scalars. +func Hemv(alpha complex128, a Hermitian, x Vector, beta complex128, y Vector) { + cblas128.Zhemv(a.Uplo, a.N, alpha, a.Data, a.Stride, x.Data, x.Inc, beta, y.Data, y.Inc) +} + +// Hbmv performs +// +// y = alpha * A * x + beta * y, +// +// where A is an n×n Hermitian band matrix, x and y are vectors, and alpha +// and beta are scalars. +func Hbmv(alpha complex128, a HermitianBand, x Vector, beta complex128, y Vector) { + cblas128.Zhbmv(a.Uplo, a.N, a.K, alpha, a.Data, a.Stride, x.Data, x.Inc, beta, y.Data, y.Inc) +} + +// Hpmv performs +// +// y = alpha * A * x + beta * y, +// +// where A is an n×n Hermitian matrix in packed format, x and y are vectors, +// and alpha and beta are scalars. +func Hpmv(alpha complex128, a HermitianPacked, x Vector, beta complex128, y Vector) { + cblas128.Zhpmv(a.Uplo, a.N, alpha, a.Data, x.Data, x.Inc, beta, y.Data, y.Inc) +} + +// Geru performs a rank-1 update +// +// A += alpha * x * yᵀ, +// +// where A is an m×n dense matrix, x and y are vectors, and alpha is a scalar. +func Geru(alpha complex128, x, y Vector, a General) { + cblas128.Zgeru(a.Rows, a.Cols, alpha, x.Data, x.Inc, y.Data, y.Inc, a.Data, a.Stride) +} + +// Gerc performs a rank-1 update +// +// A += alpha * x * yᴴ, +// +// where A is an m×n dense matrix, x and y are vectors, and alpha is a scalar. +func Gerc(alpha complex128, x, y Vector, a General) { + cblas128.Zgerc(a.Rows, a.Cols, alpha, x.Data, x.Inc, y.Data, y.Inc, a.Data, a.Stride) +} + +// Her performs a rank-1 update +// +// A += alpha * x * yᵀ, +// +// where A is an m×n Hermitian matrix, x and y are vectors, and alpha is a scalar. +func Her(alpha float64, x Vector, a Hermitian) { + cblas128.Zher(a.Uplo, a.N, alpha, x.Data, x.Inc, a.Data, a.Stride) +} + +// Hpr performs a rank-1 update +// +// A += alpha * x * xᴴ, +// +// where A is an n×n Hermitian matrix in packed format, x is a vector, and +// alpha is a scalar. +func Hpr(alpha float64, x Vector, a HermitianPacked) { + cblas128.Zhpr(a.Uplo, a.N, alpha, x.Data, x.Inc, a.Data) +} + +// Her2 performs a rank-2 update +// +// A += alpha * x * yᴴ + conj(alpha) * y * xᴴ, +// +// where A is an n×n Hermitian matrix, x and y are vectors, and alpha is a scalar. +func Her2(alpha complex128, x, y Vector, a Hermitian) { + cblas128.Zher2(a.Uplo, a.N, alpha, x.Data, x.Inc, y.Data, y.Inc, a.Data, a.Stride) +} + +// Hpr2 performs a rank-2 update +// +// A += alpha * x * yᴴ + conj(alpha) * y * xᴴ, +// +// where A is an n×n Hermitian matrix in packed format, x and y are vectors, +// and alpha is a scalar. +func Hpr2(alpha complex128, x, y Vector, a HermitianPacked) { + cblas128.Zhpr2(a.Uplo, a.N, alpha, x.Data, x.Inc, y.Data, y.Inc, a.Data) +} + +// Level 3 + +// Gemm computes +// +// C = alpha * A * B + beta * C, +// +// where A, B, and C are dense matrices, and alpha and beta are scalars. +// tA and tB specify whether A or B are transposed or conjugated. +func Gemm(tA, tB blas.Transpose, alpha complex128, a, b General, beta complex128, c General) { + var m, n, k int + if tA == blas.NoTrans { + m, k = a.Rows, a.Cols + } else { + m, k = a.Cols, a.Rows + } + if tB == blas.NoTrans { + n = b.Cols + } else { + n = b.Rows + } + cblas128.Zgemm(tA, tB, m, n, k, alpha, a.Data, a.Stride, b.Data, b.Stride, beta, c.Data, c.Stride) +} + +// Symm performs +// +// C = alpha * A * B + beta * C if s == blas.Left, +// C = alpha * B * A + beta * C if s == blas.Right, +// +// where A is an n×n or m×m symmetric matrix, B and C are m×n matrices, and +// alpha and beta are scalars. +func Symm(s blas.Side, alpha complex128, a Symmetric, b General, beta complex128, c General) { + var m, n int + if s == blas.Left { + m, n = a.N, b.Cols + } else { + m, n = b.Rows, a.N + } + cblas128.Zsymm(s, a.Uplo, m, n, alpha, a.Data, a.Stride, b.Data, b.Stride, beta, c.Data, c.Stride) +} + +// Syrk performs a symmetric rank-k update +// +// C = alpha * A * Aᵀ + beta * C if t == blas.NoTrans, +// C = alpha * Aᵀ * A + beta * C if t == blas.Trans, +// +// where C is an n×n symmetric matrix, A is an n×k matrix if t == blas.NoTrans +// and a k×n matrix otherwise, and alpha and beta are scalars. +func Syrk(t blas.Transpose, alpha complex128, a General, beta complex128, c Symmetric) { + var n, k int + if t == blas.NoTrans { + n, k = a.Rows, a.Cols + } else { + n, k = a.Cols, a.Rows + } + cblas128.Zsyrk(c.Uplo, t, n, k, alpha, a.Data, a.Stride, beta, c.Data, c.Stride) +} + +// Syr2k performs a symmetric rank-2k update +// +// C = alpha * A * Bᵀ + alpha * B * Aᵀ + beta * C if t == blas.NoTrans, +// C = alpha * Aᵀ * B + alpha * Bᵀ * A + beta * C if t == blas.Trans, +// +// where C is an n×n symmetric matrix, A and B are n×k matrices if +// t == blas.NoTrans and k×n otherwise, and alpha and beta are scalars. +func Syr2k(t blas.Transpose, alpha complex128, a, b General, beta complex128, c Symmetric) { + var n, k int + if t == blas.NoTrans { + n, k = a.Rows, a.Cols + } else { + n, k = a.Cols, a.Rows + } + cblas128.Zsyr2k(c.Uplo, t, n, k, alpha, a.Data, a.Stride, b.Data, b.Stride, beta, c.Data, c.Stride) +} + +// Trmm performs +// +// B = alpha * A * B if tA == blas.NoTrans and s == blas.Left, +// B = alpha * Aᵀ * B if tA == blas.Trans and s == blas.Left, +// B = alpha * Aᴴ * B if tA == blas.ConjTrans and s == blas.Left, +// B = alpha * B * A if tA == blas.NoTrans and s == blas.Right, +// B = alpha * B * Aᵀ if tA == blas.Trans and s == blas.Right, +// B = alpha * B * Aᴴ if tA == blas.ConjTrans and s == blas.Right, +// +// where A is an n×n or m×m triangular matrix, B is an m×n matrix, and alpha is +// a scalar. +func Trmm(s blas.Side, tA blas.Transpose, alpha complex128, a Triangular, b General) { + cblas128.Ztrmm(s, a.Uplo, tA, a.Diag, b.Rows, b.Cols, alpha, a.Data, a.Stride, b.Data, b.Stride) +} + +// Trsm solves +// +// A * X = alpha * B if tA == blas.NoTrans and s == blas.Left, +// Aᵀ * X = alpha * B if tA == blas.Trans and s == blas.Left, +// Aᴴ * X = alpha * B if tA == blas.ConjTrans and s == blas.Left, +// X * A = alpha * B if tA == blas.NoTrans and s == blas.Right, +// X * Aᵀ = alpha * B if tA == blas.Trans and s == blas.Right, +// X * Aᴴ = alpha * B if tA == blas.ConjTrans and s == blas.Right, +// +// where A is an n×n or m×m triangular matrix, X and B are m×n matrices, and +// alpha is a scalar. +// +// At entry to the function, b contains the values of B, and the result is +// stored in-place into b. +// +// No check is made that A is invertible. +func Trsm(s blas.Side, tA blas.Transpose, alpha complex128, a Triangular, b General) { + cblas128.Ztrsm(s, a.Uplo, tA, a.Diag, b.Rows, b.Cols, alpha, a.Data, a.Stride, b.Data, b.Stride) +} + +// Hemm performs +// +// C = alpha * A * B + beta * C if s == blas.Left, +// C = alpha * B * A + beta * C if s == blas.Right, +// +// where A is an n×n or m×m Hermitian matrix, B and C are m×n matrices, and +// alpha and beta are scalars. +func Hemm(s blas.Side, alpha complex128, a Hermitian, b General, beta complex128, c General) { + var m, n int + if s == blas.Left { + m, n = a.N, b.Cols + } else { + m, n = b.Rows, a.N + } + cblas128.Zhemm(s, a.Uplo, m, n, alpha, a.Data, a.Stride, b.Data, b.Stride, beta, c.Data, c.Stride) +} + +// Herk performs the Hermitian rank-k update +// +// C = alpha * A * Aᴴ + beta*C if t == blas.NoTrans, +// C = alpha * Aᴴ * A + beta*C if t == blas.ConjTrans, +// +// where C is an n×n Hermitian matrix, A is an n×k matrix if t == blas.NoTrans +// and a k×n matrix otherwise, and alpha and beta are scalars. +func Herk(t blas.Transpose, alpha float64, a General, beta float64, c Hermitian) { + var n, k int + if t == blas.NoTrans { + n, k = a.Rows, a.Cols + } else { + n, k = a.Cols, a.Rows + } + cblas128.Zherk(c.Uplo, t, n, k, alpha, a.Data, a.Stride, beta, c.Data, c.Stride) +} + +// Her2k performs the Hermitian rank-2k update +// +// C = alpha * A * Bᴴ + conj(alpha) * B * Aᴴ + beta * C if t == blas.NoTrans, +// C = alpha * Aᴴ * B + conj(alpha) * Bᴴ * A + beta * C if t == blas.ConjTrans, +// +// where C is an n×n Hermitian matrix, A and B are n×k matrices if t == NoTrans +// and k×n matrices otherwise, and alpha and beta are scalars. +func Her2k(t blas.Transpose, alpha complex128, a, b General, beta float64, c Hermitian) { + var n, k int + if t == blas.NoTrans { + n, k = a.Rows, a.Cols + } else { + n, k = a.Cols, a.Rows + } + cblas128.Zher2k(c.Uplo, t, n, k, alpha, a.Data, a.Stride, b.Data, b.Stride, beta, c.Data, c.Stride) +} diff --git a/vendor/gonum.org/v1/gonum/blas/cblas128/conv.go b/vendor/gonum.org/v1/gonum/blas/cblas128/conv.go new file mode 100644 index 0000000000..bfafb96efc --- /dev/null +++ b/vendor/gonum.org/v1/gonum/blas/cblas128/conv.go @@ -0,0 +1,265 @@ +// Code generated by "go generate gonum.org/v1/gonum/blas”; DO NOT EDIT. + +// Copyright ©2015 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package cblas128 + +import "gonum.org/v1/gonum/blas" + +// GeneralCols represents a matrix using the conventional column-major storage scheme. +type GeneralCols General + +// From fills the receiver with elements from a. The receiver +// must have the same dimensions as a and have adequate backing +// data storage. +func (t GeneralCols) From(a General) { + if t.Rows != a.Rows || t.Cols != a.Cols { + panic("cblas128: mismatched dimension") + } + if len(t.Data) < (t.Cols-1)*t.Stride+t.Rows { + panic("cblas128: short data slice") + } + for i := 0; i < a.Rows; i++ { + for j, v := range a.Data[i*a.Stride : i*a.Stride+a.Cols] { + t.Data[i+j*t.Stride] = v + } + } +} + +// From fills the receiver with elements from a. The receiver +// must have the same dimensions as a and have adequate backing +// data storage. +func (t General) From(a GeneralCols) { + if t.Rows != a.Rows || t.Cols != a.Cols { + panic("cblas128: mismatched dimension") + } + if len(t.Data) < (t.Rows-1)*t.Stride+t.Cols { + panic("cblas128: short data slice") + } + for j := 0; j < a.Cols; j++ { + for i, v := range a.Data[j*a.Stride : j*a.Stride+a.Rows] { + t.Data[i*t.Stride+j] = v + } + } +} + +// TriangularCols represents a matrix using the conventional column-major storage scheme. +type TriangularCols Triangular + +// From fills the receiver with elements from a. The receiver +// must have the same dimensions, uplo and diag as a and have +// adequate backing data storage. +func (t TriangularCols) From(a Triangular) { + if t.N != a.N { + panic("cblas128: mismatched dimension") + } + if t.Uplo != a.Uplo { + panic("cblas128: mismatched BLAS uplo") + } + if t.Diag != a.Diag { + panic("cblas128: mismatched BLAS diag") + } + switch a.Uplo { + default: + panic("cblas128: bad BLAS uplo") + case blas.Upper: + for i := 0; i < a.N; i++ { + for j := i; j < a.N; j++ { + t.Data[i+j*t.Stride] = a.Data[i*a.Stride+j] + } + } + case blas.Lower: + for i := 0; i < a.N; i++ { + for j := 0; j <= i; j++ { + t.Data[i+j*t.Stride] = a.Data[i*a.Stride+j] + } + } + case blas.All: + for i := 0; i < a.N; i++ { + for j := 0; j < a.N; j++ { + t.Data[i+j*t.Stride] = a.Data[i*a.Stride+j] + } + } + } +} + +// From fills the receiver with elements from a. The receiver +// must have the same dimensions, uplo and diag as a and have +// adequate backing data storage. +func (t Triangular) From(a TriangularCols) { + if t.N != a.N { + panic("cblas128: mismatched dimension") + } + if t.Uplo != a.Uplo { + panic("cblas128: mismatched BLAS uplo") + } + if t.Diag != a.Diag { + panic("cblas128: mismatched BLAS diag") + } + switch a.Uplo { + default: + panic("cblas128: bad BLAS uplo") + case blas.Upper: + for i := 0; i < a.N; i++ { + for j := i; j < a.N; j++ { + t.Data[i*t.Stride+j] = a.Data[i+j*a.Stride] + } + } + case blas.Lower: + for i := 0; i < a.N; i++ { + for j := 0; j <= i; j++ { + t.Data[i*t.Stride+j] = a.Data[i+j*a.Stride] + } + } + case blas.All: + for i := 0; i < a.N; i++ { + for j := 0; j < a.N; j++ { + t.Data[i*t.Stride+j] = a.Data[i+j*a.Stride] + } + } + } +} + +// BandCols represents a matrix using the band column-major storage scheme. +type BandCols Band + +// From fills the receiver with elements from a. The receiver +// must have the same dimensions and bandwidth as a and have +// adequate backing data storage. +func (t BandCols) From(a Band) { + if t.Rows != a.Rows || t.Cols != a.Cols { + panic("cblas128: mismatched dimension") + } + if t.KL != a.KL || t.KU != a.KU { + panic("cblas128: mismatched bandwidth") + } + if a.Stride < a.KL+a.KU+1 { + panic("cblas128: short stride for source") + } + if t.Stride < t.KL+t.KU+1 { + panic("cblas128: short stride for destination") + } + for i := 0; i < a.Rows; i++ { + for j := max(0, i-a.KL); j < min(i+a.KU+1, a.Cols); j++ { + t.Data[i+t.KU-j+j*t.Stride] = a.Data[j+a.KL-i+i*a.Stride] + } + } +} + +// From fills the receiver with elements from a. The receiver +// must have the same dimensions and bandwidth as a and have +// adequate backing data storage. +func (t Band) From(a BandCols) { + if t.Rows != a.Rows || t.Cols != a.Cols { + panic("cblas128: mismatched dimension") + } + if t.KL != a.KL || t.KU != a.KU { + panic("cblas128: mismatched bandwidth") + } + if a.Stride < a.KL+a.KU+1 { + panic("cblas128: short stride for source") + } + if t.Stride < t.KL+t.KU+1 { + panic("cblas128: short stride for destination") + } + for j := 0; j < a.Cols; j++ { + for i := max(0, j-a.KU); i < min(j+a.KL+1, a.Rows); i++ { + t.Data[j+a.KL-i+i*a.Stride] = a.Data[i+t.KU-j+j*t.Stride] + } + } +} + +// TriangularBandCols represents a triangular matrix using the band column-major storage scheme. +type TriangularBandCols TriangularBand + +// From fills the receiver with elements from a. The receiver +// must have the same dimensions, bandwidth and uplo as a and +// have adequate backing data storage. +func (t TriangularBandCols) From(a TriangularBand) { + if t.N != a.N { + panic("cblas128: mismatched dimension") + } + if t.K != a.K { + panic("cblas128: mismatched bandwidth") + } + if a.Stride < a.K+1 { + panic("cblas128: short stride for source") + } + if t.Stride < t.K+1 { + panic("cblas128: short stride for destination") + } + if t.Uplo != a.Uplo { + panic("cblas128: mismatched BLAS uplo") + } + if t.Diag != a.Diag { + panic("cblas128: mismatched BLAS diag") + } + dst := BandCols{ + Rows: t.N, Cols: t.N, + Stride: t.Stride, + Data: t.Data, + } + src := Band{ + Rows: a.N, Cols: a.N, + Stride: a.Stride, + Data: a.Data, + } + switch a.Uplo { + default: + panic("cblas128: bad BLAS uplo") + case blas.Upper: + dst.KU = t.K + src.KU = a.K + case blas.Lower: + dst.KL = t.K + src.KL = a.K + } + dst.From(src) +} + +// From fills the receiver with elements from a. The receiver +// must have the same dimensions, bandwidth and uplo as a and +// have adequate backing data storage. +func (t TriangularBand) From(a TriangularBandCols) { + if t.N != a.N { + panic("cblas128: mismatched dimension") + } + if t.K != a.K { + panic("cblas128: mismatched bandwidth") + } + if a.Stride < a.K+1 { + panic("cblas128: short stride for source") + } + if t.Stride < t.K+1 { + panic("cblas128: short stride for destination") + } + if t.Uplo != a.Uplo { + panic("cblas128: mismatched BLAS uplo") + } + if t.Diag != a.Diag { + panic("cblas128: mismatched BLAS diag") + } + dst := Band{ + Rows: t.N, Cols: t.N, + Stride: t.Stride, + Data: t.Data, + } + src := BandCols{ + Rows: a.N, Cols: a.N, + Stride: a.Stride, + Data: a.Data, + } + switch a.Uplo { + default: + panic("cblas128: bad BLAS uplo") + case blas.Upper: + dst.KU = t.K + src.KU = a.K + case blas.Lower: + dst.KL = t.K + src.KL = a.K + } + dst.From(src) +} diff --git a/vendor/gonum.org/v1/gonum/blas/cblas128/conv_hermitian.go b/vendor/gonum.org/v1/gonum/blas/cblas128/conv_hermitian.go new file mode 100644 index 0000000000..51c3a5777b --- /dev/null +++ b/vendor/gonum.org/v1/gonum/blas/cblas128/conv_hermitian.go @@ -0,0 +1,155 @@ +// Code generated by "go generate gonum.org/v1/gonum/blas”; DO NOT EDIT. + +// Copyright ©2015 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package cblas128 + +import "gonum.org/v1/gonum/blas" + +// HermitianCols represents a matrix using the conventional column-major storage scheme. +type HermitianCols Hermitian + +// From fills the receiver with elements from a. The receiver +// must have the same dimensions and uplo as a and have adequate +// backing data storage. +func (t HermitianCols) From(a Hermitian) { + if t.N != a.N { + panic("cblas128: mismatched dimension") + } + if t.Uplo != a.Uplo { + panic("cblas128: mismatched BLAS uplo") + } + switch a.Uplo { + default: + panic("cblas128: bad BLAS uplo") + case blas.Upper: + for i := 0; i < a.N; i++ { + for j := i; j < a.N; j++ { + t.Data[i+j*t.Stride] = a.Data[i*a.Stride+j] + } + } + case blas.Lower: + for i := 0; i < a.N; i++ { + for j := 0; j <= i; j++ { + t.Data[i+j*t.Stride] = a.Data[i*a.Stride+j] + } + } + } +} + +// From fills the receiver with elements from a. The receiver +// must have the same dimensions and uplo as a and have adequate +// backing data storage. +func (t Hermitian) From(a HermitianCols) { + if t.N != a.N { + panic("cblas128: mismatched dimension") + } + if t.Uplo != a.Uplo { + panic("cblas128: mismatched BLAS uplo") + } + switch a.Uplo { + default: + panic("cblas128: bad BLAS uplo") + case blas.Upper: + for i := 0; i < a.N; i++ { + for j := i; j < a.N; j++ { + t.Data[i*t.Stride+j] = a.Data[i+j*a.Stride] + } + } + case blas.Lower: + for i := 0; i < a.N; i++ { + for j := 0; j <= i; j++ { + t.Data[i*t.Stride+j] = a.Data[i+j*a.Stride] + } + } + } +} + +// HermitianBandCols represents an Hermitian matrix using the band column-major storage scheme. +type HermitianBandCols HermitianBand + +// From fills the receiver with elements from a. The receiver +// must have the same dimensions, bandwidth and uplo as a and +// have adequate backing data storage. +func (t HermitianBandCols) From(a HermitianBand) { + if t.N != a.N { + panic("cblas128: mismatched dimension") + } + if t.K != a.K { + panic("cblas128: mismatched bandwidth") + } + if a.Stride < a.K+1 { + panic("cblas128: short stride for source") + } + if t.Stride < t.K+1 { + panic("cblas128: short stride for destination") + } + if t.Uplo != a.Uplo { + panic("cblas128: mismatched BLAS uplo") + } + dst := BandCols{ + Rows: t.N, Cols: t.N, + Stride: t.Stride, + Data: t.Data, + } + src := Band{ + Rows: a.N, Cols: a.N, + Stride: a.Stride, + Data: a.Data, + } + switch a.Uplo { + default: + panic("cblas128: bad BLAS uplo") + case blas.Upper: + dst.KU = t.K + src.KU = a.K + case blas.Lower: + dst.KL = t.K + src.KL = a.K + } + dst.From(src) +} + +// From fills the receiver with elements from a. The receiver +// must have the same dimensions, bandwidth and uplo as a and +// have adequate backing data storage. +func (t HermitianBand) From(a HermitianBandCols) { + if t.N != a.N { + panic("cblas128: mismatched dimension") + } + if t.K != a.K { + panic("cblas128: mismatched bandwidth") + } + if a.Stride < a.K+1 { + panic("cblas128: short stride for source") + } + if t.Stride < t.K+1 { + panic("cblas128: short stride for destination") + } + if t.Uplo != a.Uplo { + panic("cblas128: mismatched BLAS uplo") + } + dst := Band{ + Rows: t.N, Cols: t.N, + Stride: t.Stride, + Data: t.Data, + } + src := BandCols{ + Rows: a.N, Cols: a.N, + Stride: a.Stride, + Data: a.Data, + } + switch a.Uplo { + default: + panic("cblas128: bad BLAS uplo") + case blas.Upper: + dst.KU = t.K + src.KU = a.K + case blas.Lower: + dst.KL = t.K + src.KL = a.K + } + dst.From(src) +} diff --git a/vendor/gonum.org/v1/gonum/blas/cblas128/conv_symmetric.go b/vendor/gonum.org/v1/gonum/blas/cblas128/conv_symmetric.go new file mode 100644 index 0000000000..f1bf40c208 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/blas/cblas128/conv_symmetric.go @@ -0,0 +1,155 @@ +// Code generated by "go generate gonum.org/v1/gonum/blas”; DO NOT EDIT. + +// Copyright ©2015 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package cblas128 + +import "gonum.org/v1/gonum/blas" + +// SymmetricCols represents a matrix using the conventional column-major storage scheme. +type SymmetricCols Symmetric + +// From fills the receiver with elements from a. The receiver +// must have the same dimensions and uplo as a and have adequate +// backing data storage. +func (t SymmetricCols) From(a Symmetric) { + if t.N != a.N { + panic("cblas128: mismatched dimension") + } + if t.Uplo != a.Uplo { + panic("cblas128: mismatched BLAS uplo") + } + switch a.Uplo { + default: + panic("cblas128: bad BLAS uplo") + case blas.Upper: + for i := 0; i < a.N; i++ { + for j := i; j < a.N; j++ { + t.Data[i+j*t.Stride] = a.Data[i*a.Stride+j] + } + } + case blas.Lower: + for i := 0; i < a.N; i++ { + for j := 0; j <= i; j++ { + t.Data[i+j*t.Stride] = a.Data[i*a.Stride+j] + } + } + } +} + +// From fills the receiver with elements from a. The receiver +// must have the same dimensions and uplo as a and have adequate +// backing data storage. +func (t Symmetric) From(a SymmetricCols) { + if t.N != a.N { + panic("cblas128: mismatched dimension") + } + if t.Uplo != a.Uplo { + panic("cblas128: mismatched BLAS uplo") + } + switch a.Uplo { + default: + panic("cblas128: bad BLAS uplo") + case blas.Upper: + for i := 0; i < a.N; i++ { + for j := i; j < a.N; j++ { + t.Data[i*t.Stride+j] = a.Data[i+j*a.Stride] + } + } + case blas.Lower: + for i := 0; i < a.N; i++ { + for j := 0; j <= i; j++ { + t.Data[i*t.Stride+j] = a.Data[i+j*a.Stride] + } + } + } +} + +// SymmetricBandCols represents a symmetric matrix using the band column-major storage scheme. +type SymmetricBandCols SymmetricBand + +// From fills the receiver with elements from a. The receiver +// must have the same dimensions, bandwidth and uplo as a and +// have adequate backing data storage. +func (t SymmetricBandCols) From(a SymmetricBand) { + if t.N != a.N { + panic("cblas128: mismatched dimension") + } + if t.K != a.K { + panic("cblas128: mismatched bandwidth") + } + if a.Stride < a.K+1 { + panic("cblas128: short stride for source") + } + if t.Stride < t.K+1 { + panic("cblas128: short stride for destination") + } + if t.Uplo != a.Uplo { + panic("cblas128: mismatched BLAS uplo") + } + dst := BandCols{ + Rows: t.N, Cols: t.N, + Stride: t.Stride, + Data: t.Data, + } + src := Band{ + Rows: a.N, Cols: a.N, + Stride: a.Stride, + Data: a.Data, + } + switch a.Uplo { + default: + panic("cblas128: bad BLAS uplo") + case blas.Upper: + dst.KU = t.K + src.KU = a.K + case blas.Lower: + dst.KL = t.K + src.KL = a.K + } + dst.From(src) +} + +// From fills the receiver with elements from a. The receiver +// must have the same dimensions, bandwidth and uplo as a and +// have adequate backing data storage. +func (t SymmetricBand) From(a SymmetricBandCols) { + if t.N != a.N { + panic("cblas128: mismatched dimension") + } + if t.K != a.K { + panic("cblas128: mismatched bandwidth") + } + if a.Stride < a.K+1 { + panic("cblas128: short stride for source") + } + if t.Stride < t.K+1 { + panic("cblas128: short stride for destination") + } + if t.Uplo != a.Uplo { + panic("cblas128: mismatched BLAS uplo") + } + dst := Band{ + Rows: t.N, Cols: t.N, + Stride: t.Stride, + Data: t.Data, + } + src := BandCols{ + Rows: a.N, Cols: a.N, + Stride: a.Stride, + Data: a.Data, + } + switch a.Uplo { + default: + panic("cblas128: bad BLAS uplo") + case blas.Upper: + dst.KU = t.K + src.KU = a.K + case blas.Lower: + dst.KL = t.K + src.KL = a.K + } + dst.From(src) +} diff --git a/vendor/gonum.org/v1/gonum/blas/cblas128/doc.go b/vendor/gonum.org/v1/gonum/blas/cblas128/doc.go new file mode 100644 index 0000000000..09719b19e6 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/blas/cblas128/doc.go @@ -0,0 +1,6 @@ +// Copyright ©2017 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package cblas128 provides a simple interface to the complex128 BLAS API. +package cblas128 // import "gonum.org/v1/gonum/blas/cblas128" diff --git a/vendor/gonum.org/v1/gonum/blas/conversions.bash b/vendor/gonum.org/v1/gonum/blas/conversions.bash new file mode 100644 index 0000000000..d1c0ef0d99 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/blas/conversions.bash @@ -0,0 +1,159 @@ +#!/usr/bin/env bash + +# Copyright ©2017 The Gonum Authors. All rights reserved. +# Use of this source code is governed by a BSD-style +# license that can be found in the LICENSE file. + +# Generate code for blas32. +echo Generating blas32/conv.go +echo -e '// Code generated by "go generate gonum.org/v1/gonum/blas”; DO NOT EDIT.\n' > blas32/conv.go +cat blas64/conv.go \ +| gofmt -r 'float64 -> float32' \ +\ +| sed -e 's/blas64/blas32/' \ +\ +>> blas32/conv.go + +echo Generating blas32/conv_test.go +echo -e '// Code generated by "go generate gonum.org/v1/gonum/blas”; DO NOT EDIT.\n' > blas32/conv_test.go +cat blas64/conv_test.go \ +| gofmt -r 'float64 -> float32' \ +\ +| sed -e 's/blas64/blas32/' \ + -e 's_"math"_math "gonum.org/v1/gonum/internal/math32"_' \ +\ +>> blas32/conv_test.go + +echo Generating blas32/conv_symmetric.go +echo -e '// Code generated by "go generate gonum.org/v1/gonum/blas”; DO NOT EDIT.\n' > blas32/conv_symmetric.go +cat blas64/conv_symmetric.go \ +| gofmt -r 'float64 -> float32' \ +\ +| sed -e 's/blas64/blas32/' \ +\ +>> blas32/conv_symmetric.go + +echo Generating blas32/conv_symmetric_test.go +echo -e '// Code generated by "go generate gonum.org/v1/gonum/blas”; DO NOT EDIT.\n' > blas32/conv_symmetric_test.go +cat blas64/conv_symmetric_test.go \ +| gofmt -r 'float64 -> float32' \ +\ +| sed -e 's/blas64/blas32/' \ + -e 's_"math"_math "gonum.org/v1/gonum/internal/math32"_' \ +\ +>> blas32/conv_symmetric_test.go + + +# Generate code for cblas128. +echo Generating cblas128/conv.go +echo -e '// Code generated by "go generate gonum.org/v1/gonum/blas”; DO NOT EDIT.\n' > cblas128/conv.go +cat blas64/conv.go \ +| gofmt -r 'float64 -> complex128' \ +\ +| sed -e 's/blas64/cblas128/' \ +\ +>> cblas128/conv.go + +echo Generating cblas128/conv_test.go +echo -e '// Code generated by "go generate gonum.org/v1/gonum/blas”; DO NOT EDIT.\n' > cblas128/conv_test.go +cat blas64/conv_test.go \ +| gofmt -r 'float64 -> complex128' \ +\ +| sed -e 's/blas64/cblas128/' \ + -e 's_"math"_math "math/cmplx"_' \ +\ +>> cblas128/conv_test.go + +echo Generating cblas128/conv_symmetric.go +echo -e '// Code generated by "go generate gonum.org/v1/gonum/blas”; DO NOT EDIT.\n' > cblas128/conv_symmetric.go +cat blas64/conv_symmetric.go \ +| gofmt -r 'float64 -> complex128' \ +\ +| sed -e 's/blas64/cblas128/' \ +\ +>> cblas128/conv_symmetric.go + +echo Generating cblas128/conv_symmetric_test.go +echo -e '// Code generated by "go generate gonum.org/v1/gonum/blas”; DO NOT EDIT.\n' > cblas128/conv_symmetric_test.go +cat blas64/conv_symmetric_test.go \ +| gofmt -r 'float64 -> complex128' \ +\ +| sed -e 's/blas64/cblas128/' \ + -e 's_"math"_math "math/cmplx"_' \ +\ +>> cblas128/conv_symmetric_test.go + +echo Generating cblas128/conv_hermitian.go +echo -e '// Code generated by "go generate gonum.org/v1/gonum/blas”; DO NOT EDIT.\n' > cblas128/conv_hermitian.go +cat blas64/conv_symmetric.go \ +| gofmt -r 'float64 -> complex128' \ +\ +| sed -e 's/blas64/cblas128/' \ + -e 's/Symmetric/Hermitian/g' \ + -e 's/a symmetric/an Hermitian/g' \ + -e 's/symmetric/hermitian/g' \ + -e 's/Sym/Herm/g' \ +\ +>> cblas128/conv_hermitian.go + +echo Generating cblas128/conv_hermitian_test.go +echo -e '// Code generated by "go generate gonum.org/v1/gonum/blas”; DO NOT EDIT.\n' > cblas128/conv_hermitian_test.go +cat blas64/conv_symmetric_test.go \ +| gofmt -r 'float64 -> complex128' \ +\ +| sed -e 's/blas64/cblas128/' \ + -e 's/Symmetric/Hermitian/g' \ + -e 's/a symmetric/an Hermitian/g' \ + -e 's/symmetric/hermitian/g' \ + -e 's/Sym/Herm/g' \ + -e 's_"math"_math "math/cmplx"_' \ +\ +>> cblas128/conv_hermitian_test.go + + +# Generate code for cblas64. +echo Generating cblas64/conv.go +echo -e '// Code generated by "go generate gonum.org/v1/gonum/blas”; DO NOT EDIT.\n' > cblas64/conv.go +cat blas64/conv.go \ +| gofmt -r 'float64 -> complex64' \ +\ +| sed -e 's/blas64/cblas64/' \ +\ +>> cblas64/conv.go + +echo Generating cblas64/conv_test.go +echo -e '// Code generated by "go generate gonum.org/v1/gonum/blas”; DO NOT EDIT.\n' > cblas64/conv_test.go +cat blas64/conv_test.go \ +| gofmt -r 'float64 -> complex64' \ +\ +| sed -e 's/blas64/cblas64/' \ + -e 's_"math"_math "gonum.org/v1/gonum/internal/cmplx64"_' \ +\ +>> cblas64/conv_test.go + +echo Generating cblas64/conv_hermitian.go +echo -e '// Code generated by "go generate gonum.org/v1/gonum/blas”; DO NOT EDIT.\n' > cblas64/conv_hermitian.go +cat blas64/conv_symmetric.go \ +| gofmt -r 'float64 -> complex64' \ +\ +| sed -e 's/blas64/cblas64/' \ + -e 's/Symmetric/Hermitian/g' \ + -e 's/a symmetric/an Hermitian/g' \ + -e 's/symmetric/hermitian/g' \ + -e 's/Sym/Herm/g' \ +\ +>> cblas64/conv_hermitian.go + +echo Generating cblas64/conv_hermitian_test.go +echo -e '// Code generated by "go generate gonum.org/v1/gonum/blas”; DO NOT EDIT.\n' > cblas64/conv_hermitian_test.go +cat blas64/conv_symmetric_test.go \ +| gofmt -r 'float64 -> complex64' \ +\ +| sed -e 's/blas64/cblas64/' \ + -e 's/Symmetric/Hermitian/g' \ + -e 's/a symmetric/an Hermitian/g' \ + -e 's/symmetric/hermitian/g' \ + -e 's/Sym/Herm/g' \ + -e 's_"math"_math "gonum.org/v1/gonum/internal/cmplx64"_' \ +\ +>> cblas64/conv_hermitian_test.go diff --git a/vendor/gonum.org/v1/gonum/blas/doc.go b/vendor/gonum.org/v1/gonum/blas/doc.go new file mode 100644 index 0000000000..ea4b16c904 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/blas/doc.go @@ -0,0 +1,108 @@ +// Copyright ©2017 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +/* +Package blas provides interfaces for the BLAS linear algebra standard. + +All methods must perform appropriate parameter checking and panic if +provided parameters that do not conform to the requirements specified +by the BLAS standard. + +Quick Reference Guide to the BLAS from http://www.netlib.org/lapack/lug/node145.html + +This version is modified to remove the "order" option. All matrix operations are +on row-order matrices. + +Level 1 BLAS + + dim scalar vector vector scalars 5-element prefixes + struct + + _rotg ( a, b ) S, D + _rotmg( d1, d2, a, b ) S, D + _rot ( n, x, incX, y, incY, c, s ) S, D + _rotm ( n, x, incX, y, incY, param ) S, D + _swap ( n, x, incX, y, incY ) S, D, C, Z + _scal ( n, alpha, x, incX ) S, D, C, Z, Cs, Zd + _copy ( n, x, incX, y, incY ) S, D, C, Z + _axpy ( n, alpha, x, incX, y, incY ) S, D, C, Z + _dot ( n, x, incX, y, incY ) S, D, Ds + _dotu ( n, x, incX, y, incY ) C, Z + _dotc ( n, x, incX, y, incY ) C, Z + __dot ( n, alpha, x, incX, y, incY ) Sds + _nrm2 ( n, x, incX ) S, D, Sc, Dz + _asum ( n, x, incX ) S, D, Sc, Dz + I_amax( n, x, incX ) s, d, c, z + +Level 2 BLAS + + options dim b-width scalar matrix vector scalar vector prefixes + + _gemv ( trans, m, n, alpha, a, lda, x, incX, beta, y, incY ) S, D, C, Z + _gbmv ( trans, m, n, kL, kU, alpha, a, lda, x, incX, beta, y, incY ) S, D, C, Z + _hemv ( uplo, n, alpha, a, lda, x, incX, beta, y, incY ) C, Z + _hbmv ( uplo, n, k, alpha, a, lda, x, incX, beta, y, incY ) C, Z + _hpmv ( uplo, n, alpha, ap, x, incX, beta, y, incY ) C, Z + _symv ( uplo, n, alpha, a, lda, x, incX, beta, y, incY ) S, D + _sbmv ( uplo, n, k, alpha, a, lda, x, incX, beta, y, incY ) S, D + _spmv ( uplo, n, alpha, ap, x, incX, beta, y, incY ) S, D + _trmv ( uplo, trans, diag, n, a, lda, x, incX ) S, D, C, Z + _tbmv ( uplo, trans, diag, n, k, a, lda, x, incX ) S, D, C, Z + _tpmv ( uplo, trans, diag, n, ap, x, incX ) S, D, C, Z + _trsv ( uplo, trans, diag, n, a, lda, x, incX ) S, D, C, Z + _tbsv ( uplo, trans, diag, n, k, a, lda, x, incX ) S, D, C, Z + _tpsv ( uplo, trans, diag, n, ap, x, incX ) S, D, C, Z + + options dim scalar vector vector matrix prefixes + + _ger ( m, n, alpha, x, incX, y, incY, a, lda ) S, D + _geru ( m, n, alpha, x, incX, y, incY, a, lda ) C, Z + _gerc ( m, n, alpha, x, incX, y, incY, a, lda ) C, Z + _her ( uplo, n, alpha, x, incX, a, lda ) C, Z + _hpr ( uplo, n, alpha, x, incX, ap ) C, Z + _her2 ( uplo, n, alpha, x, incX, y, incY, a, lda ) C, Z + _hpr2 ( uplo, n, alpha, x, incX, y, incY, ap ) C, Z + _syr ( uplo, n, alpha, x, incX, a, lda ) S, D + _spr ( uplo, n, alpha, x, incX, ap ) S, D + _syr2 ( uplo, n, alpha, x, incX, y, incY, a, lda ) S, D + _spr2 ( uplo, n, alpha, x, incX, y, incY, ap ) S, D + +Level 3 BLAS + + options dim scalar matrix matrix scalar matrix prefixes + + _gemm ( transA, transB, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc ) S, D, C, Z + _symm ( side, uplo, m, n, alpha, a, lda, b, ldb, beta, c, ldc ) S, D, C, Z + _hemm ( side, uplo, m, n, alpha, a, lda, b, ldb, beta, c, ldc ) C, Z + _syrk ( uplo, trans, n, k, alpha, a, lda, beta, c, ldc ) S, D, C, Z + _herk ( uplo, trans, n, k, alpha, a, lda, beta, c, ldc ) C, Z + _syr2k( uplo, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc ) S, D, C, Z + _her2k( uplo, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc ) C, Z + _trmm ( side, uplo, transA, diag, m, n, alpha, a, lda, b, ldb ) S, D, C, Z + _trsm ( side, uplo, transA, diag, m, n, alpha, a, lda, b, ldb ) S, D, C, Z + +Meaning of prefixes + + S - float32 C - complex64 + D - float64 Z - complex128 + +Matrix types + + GE - GEneral GB - General Band + SY - SYmmetric SB - Symmetric Band SP - Symmetric Packed + HE - HErmitian HB - Hermitian Band HP - Hermitian Packed + TR - TRiangular TB - Triangular Band TP - Triangular Packed + +Options + + trans = NoTrans, Trans, ConjTrans + uplo = Upper, Lower + diag = Nonunit, Unit + side = Left, Right (A or op(A) on the left, or A or op(A) on the right) + +For real matrices, Trans and ConjTrans have the same meaning. +For Hermitian matrices, trans = Trans is not allowed. +For complex symmetric matrices, trans = ConjTrans is not allowed. +*/ +package blas // import "gonum.org/v1/gonum/blas" diff --git a/vendor/gonum.org/v1/gonum/blas/gonum/dgemm.go b/vendor/gonum.org/v1/gonum/blas/gonum/dgemm.go new file mode 100644 index 0000000000..9e74cc1dbf --- /dev/null +++ b/vendor/gonum.org/v1/gonum/blas/gonum/dgemm.go @@ -0,0 +1,297 @@ +// Copyright ©2014 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gonum + +import ( + "runtime" + "sync" + + "gonum.org/v1/gonum/blas" + "gonum.org/v1/gonum/internal/asm/f64" +) + +// Dgemm performs one of the matrix-matrix operations +// +// C = alpha * A * B + beta * C +// C = alpha * Aᵀ * B + beta * C +// C = alpha * A * Bᵀ + beta * C +// C = alpha * Aᵀ * Bᵀ + beta * C +// +// where A is an m×k or k×m dense matrix, B is an n×k or k×n dense matrix, C is +// an m×n matrix, and alpha and beta are scalars. tA and tB specify whether A or +// B are transposed. +func (Implementation) Dgemm(tA, tB blas.Transpose, m, n, k int, alpha float64, a []float64, lda int, b []float64, ldb int, beta float64, c []float64, ldc int) { + switch tA { + default: + panic(badTranspose) + case blas.NoTrans, blas.Trans, blas.ConjTrans: + } + switch tB { + default: + panic(badTranspose) + case blas.NoTrans, blas.Trans, blas.ConjTrans: + } + if m < 0 { + panic(mLT0) + } + if n < 0 { + panic(nLT0) + } + if k < 0 { + panic(kLT0) + } + aTrans := tA == blas.Trans || tA == blas.ConjTrans + if aTrans { + if lda < max(1, m) { + panic(badLdA) + } + } else { + if lda < max(1, k) { + panic(badLdA) + } + } + bTrans := tB == blas.Trans || tB == blas.ConjTrans + if bTrans { + if ldb < max(1, k) { + panic(badLdB) + } + } else { + if ldb < max(1, n) { + panic(badLdB) + } + } + if ldc < max(1, n) { + panic(badLdC) + } + + // Quick return if possible. + if m == 0 || n == 0 { + return + } + + // For zero matrix size the following slice length checks are trivially satisfied. + if aTrans { + if len(a) < (k-1)*lda+m { + panic(shortA) + } + } else { + if len(a) < (m-1)*lda+k { + panic(shortA) + } + } + if bTrans { + if len(b) < (n-1)*ldb+k { + panic(shortB) + } + } else { + if len(b) < (k-1)*ldb+n { + panic(shortB) + } + } + if len(c) < (m-1)*ldc+n { + panic(shortC) + } + + // Quick return if possible. + if (alpha == 0 || k == 0) && beta == 1 { + return + } + + // scale c + if beta != 1 { + if beta == 0 { + for i := 0; i < m; i++ { + ctmp := c[i*ldc : i*ldc+n] + for j := range ctmp { + ctmp[j] = 0 + } + } + } else { + for i := 0; i < m; i++ { + ctmp := c[i*ldc : i*ldc+n] + for j := range ctmp { + ctmp[j] *= beta + } + } + } + } + + dgemmParallel(aTrans, bTrans, m, n, k, a, lda, b, ldb, c, ldc, alpha) +} + +func dgemmParallel(aTrans, bTrans bool, m, n, k int, a []float64, lda int, b []float64, ldb int, c []float64, ldc int, alpha float64) { + // dgemmParallel computes a parallel matrix multiplication by partitioning + // a and b into sub-blocks, and updating c with the multiplication of the sub-block + // In all cases, + // A = [ A_11 A_12 ... A_1j + // A_21 A_22 ... A_2j + // ... + // A_i1 A_i2 ... A_ij] + // + // and same for B. All of the submatrix sizes are blockSize×blockSize except + // at the edges. + // + // In all cases, there is one dimension for each matrix along which + // C must be updated sequentially. + // Cij = \sum_k Aik Bki, (A * B) + // Cij = \sum_k Aki Bkj, (Aᵀ * B) + // Cij = \sum_k Aik Bjk, (A * Bᵀ) + // Cij = \sum_k Aki Bjk, (Aᵀ * Bᵀ) + // + // This code computes one {i, j} block sequentially along the k dimension, + // and computes all of the {i, j} blocks concurrently. This + // partitioning allows Cij to be updated in-place without race-conditions. + // Instead of launching a goroutine for each possible concurrent computation, + // a number of worker goroutines are created and channels are used to pass + // available and completed cases. + // + // http://alexkr.com/docs/matrixmult.pdf is a good reference on matrix-matrix + // multiplies, though this code does not copy matrices to attempt to eliminate + // cache misses. + + maxKLen := k + parBlocks := blocks(m, blockSize) * blocks(n, blockSize) + if parBlocks < minParBlock { + // The matrix multiplication is small in the dimensions where it can be + // computed concurrently. Just do it in serial. + dgemmSerial(aTrans, bTrans, m, n, k, a, lda, b, ldb, c, ldc, alpha) + return + } + + // workerLimit acts a number of maximum concurrent workers, + // with the limit set to the number of procs available. + workerLimit := make(chan struct{}, runtime.GOMAXPROCS(0)) + + // wg is used to wait for all + var wg sync.WaitGroup + wg.Add(parBlocks) + defer wg.Wait() + + for i := 0; i < m; i += blockSize { + for j := 0; j < n; j += blockSize { + workerLimit <- struct{}{} + go func(i, j int) { + defer func() { + wg.Done() + <-workerLimit + }() + + leni := blockSize + if i+leni > m { + leni = m - i + } + lenj := blockSize + if j+lenj > n { + lenj = n - j + } + + cSub := sliceView64(c, ldc, i, j, leni, lenj) + + // Compute A_ik B_kj for all k + for k := 0; k < maxKLen; k += blockSize { + lenk := blockSize + if k+lenk > maxKLen { + lenk = maxKLen - k + } + var aSub, bSub []float64 + if aTrans { + aSub = sliceView64(a, lda, k, i, lenk, leni) + } else { + aSub = sliceView64(a, lda, i, k, leni, lenk) + } + if bTrans { + bSub = sliceView64(b, ldb, j, k, lenj, lenk) + } else { + bSub = sliceView64(b, ldb, k, j, lenk, lenj) + } + dgemmSerial(aTrans, bTrans, leni, lenj, lenk, aSub, lda, bSub, ldb, cSub, ldc, alpha) + } + }(i, j) + } + } +} + +// dgemmSerial is serial matrix multiply +func dgemmSerial(aTrans, bTrans bool, m, n, k int, a []float64, lda int, b []float64, ldb int, c []float64, ldc int, alpha float64) { + switch { + case !aTrans && !bTrans: + dgemmSerialNotNot(m, n, k, a, lda, b, ldb, c, ldc, alpha) + return + case aTrans && !bTrans: + dgemmSerialTransNot(m, n, k, a, lda, b, ldb, c, ldc, alpha) + return + case !aTrans && bTrans: + dgemmSerialNotTrans(m, n, k, a, lda, b, ldb, c, ldc, alpha) + return + case aTrans && bTrans: + dgemmSerialTransTrans(m, n, k, a, lda, b, ldb, c, ldc, alpha) + return + default: + panic("unreachable") + } +} + +// dgemmSerial where neither a nor b are transposed +func dgemmSerialNotNot(m, n, k int, a []float64, lda int, b []float64, ldb int, c []float64, ldc int, alpha float64) { + // This style is used instead of the literal [i*stride +j]) is used because + // approximately 5 times faster as of go 1.3. + for i := 0; i < m; i++ { + ctmp := c[i*ldc : i*ldc+n] + for l, v := range a[i*lda : i*lda+k] { + tmp := alpha * v + if tmp != 0 { + f64.AxpyUnitary(tmp, b[l*ldb:l*ldb+n], ctmp) + } + } + } +} + +// dgemmSerial where neither a is transposed and b is not +func dgemmSerialTransNot(m, n, k int, a []float64, lda int, b []float64, ldb int, c []float64, ldc int, alpha float64) { + // This style is used instead of the literal [i*stride +j]) is used because + // approximately 5 times faster as of go 1.3. + for l := 0; l < k; l++ { + btmp := b[l*ldb : l*ldb+n] + for i, v := range a[l*lda : l*lda+m] { + tmp := alpha * v + if tmp != 0 { + ctmp := c[i*ldc : i*ldc+n] + f64.AxpyUnitary(tmp, btmp, ctmp) + } + } + } +} + +// dgemmSerial where neither a is not transposed and b is +func dgemmSerialNotTrans(m, n, k int, a []float64, lda int, b []float64, ldb int, c []float64, ldc int, alpha float64) { + // This style is used instead of the literal [i*stride +j]) is used because + // approximately 5 times faster as of go 1.3. + for i := 0; i < m; i++ { + atmp := a[i*lda : i*lda+k] + ctmp := c[i*ldc : i*ldc+n] + for j := 0; j < n; j++ { + ctmp[j] += alpha * f64.DotUnitary(atmp, b[j*ldb:j*ldb+k]) + } + } +} + +// dgemmSerial where both are transposed +func dgemmSerialTransTrans(m, n, k int, a []float64, lda int, b []float64, ldb int, c []float64, ldc int, alpha float64) { + // This style is used instead of the literal [i*stride +j]) is used because + // approximately 5 times faster as of go 1.3. + for l := 0; l < k; l++ { + for i, v := range a[l*lda : l*lda+m] { + tmp := alpha * v + if tmp != 0 { + ctmp := c[i*ldc : i*ldc+n] + f64.AxpyInc(tmp, b[l:], ctmp, uintptr(n), uintptr(ldb), 1, 0, 0) + } + } + } +} + +func sliceView64(a []float64, lda, i, j, r, c int) []float64 { + return a[i*lda+j : (i+r-1)*lda+j+c] +} diff --git a/vendor/gonum.org/v1/gonum/blas/gonum/doc.go b/vendor/gonum.org/v1/gonum/blas/gonum/doc.go new file mode 100644 index 0000000000..cbca601d90 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/blas/gonum/doc.go @@ -0,0 +1,99 @@ +// Copyright ©2015 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Ensure changes made to blas/native are reflected in blas/cgo where relevant. + +/* +Package gonum is a Go implementation of the BLAS API. This implementation +panics when the input arguments are invalid as per the standard, for example +if a vector increment is zero. Note that the treatment of NaN values +is not specified, and differs among the BLAS implementations. +gonum.org/v1/gonum/blas/blas64 provides helpful wrapper functions to the BLAS +interface. The rest of this text describes the layout of the data for the input types. + +Note that in the function documentation, x[i] refers to the i^th element +of the vector, which will be different from the i^th element of the slice if +incX != 1. + +See http://www.netlib.org/lapack/explore-html/d4/de1/_l_i_c_e_n_s_e_source.html +for more license information. + +Vector arguments are effectively strided slices. They have two input arguments, +a number of elements, n, and an increment, incX. The increment specifies the +distance between elements of the vector. The actual Go slice may be longer +than necessary. +The increment may be positive or negative, except in functions with only +a single vector argument where the increment may only be positive. If the increment +is negative, s[0] is the last element in the slice. Note that this is not the same +as counting backward from the end of the slice, as len(s) may be longer than +necessary. So, for example, if n = 5 and incX = 3, the elements of s are + + [0 * * 1 * * 2 * * 3 * * 4 * * * ...] + +where ∗ elements are never accessed. If incX = -3, the same elements are +accessed, just in reverse order (4, 3, 2, 1, 0). + +Dense matrices are specified by a number of rows, a number of columns, and a stride. +The stride specifies the number of entries in the slice between the first element +of successive rows. The stride must be at least as large as the number of columns +but may be longer. + + [a00 ... a0n a0* ... a1stride-1 a21 ... amn am* ... amstride-1] + +Thus, dense[i*ld + j] refers to the {i, j}th element of the matrix. + +Symmetric and triangular matrices (non-packed) are stored identically to Dense, +except that only elements in one triangle of the matrix are accessed. + +Packed symmetric and packed triangular matrices are laid out with the entries +condensed such that all of the unreferenced elements are removed. So, the upper triangular +matrix + + [ + 1 2 3 + 0 4 5 + 0 0 6 + ] + +and the lower-triangular matrix + + [ + 1 0 0 + 2 3 0 + 4 5 6 + ] + +will both be compacted as [1 2 3 4 5 6]. The (i, j) element of the original +dense matrix can be found at element i*n - (i-1)*i/2 + j for upper triangular, +and at element i * (i+1) /2 + j for lower triangular. + +Banded matrices are laid out in a compact format, constructed by removing the +zeros in the rows and aligning the diagonals. For example, the matrix + + [ + 1 2 3 0 0 0 + 4 5 6 7 0 0 + 0 8 9 10 11 0 + 0 0 12 13 14 15 + 0 0 0 16 17 18 + 0 0 0 0 19 20 + ] + +implicitly becomes (∗ entries are never accessed) + + [ + * 1 2 3 + 4 5 6 7 + 8 9 10 11 + 12 13 14 15 + 16 17 18 * + 19 20 * * + ] + +which is given to the BLAS routine as [∗ 1 2 3 4 ...]. + +See http://www.crest.iu.edu/research/mtl/reference/html/banded.html +for more information +*/ +package gonum // import "gonum.org/v1/gonum/blas/gonum" diff --git a/vendor/gonum.org/v1/gonum/blas/gonum/errors.go b/vendor/gonum.org/v1/gonum/blas/gonum/errors.go new file mode 100644 index 0000000000..e98575d0fa --- /dev/null +++ b/vendor/gonum.org/v1/gonum/blas/gonum/errors.go @@ -0,0 +1,35 @@ +// Copyright ©2015 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gonum + +// Panic strings used during parameter checks. +// This list is duplicated in netlib/blas/netlib. Keep in sync. +const ( + zeroIncX = "blas: zero x index increment" + zeroIncY = "blas: zero y index increment" + + mLT0 = "blas: m < 0" + nLT0 = "blas: n < 0" + kLT0 = "blas: k < 0" + kLLT0 = "blas: kL < 0" + kULT0 = "blas: kU < 0" + + badUplo = "blas: illegal triangle" + badTranspose = "blas: illegal transpose" + badDiag = "blas: illegal diagonal" + badSide = "blas: illegal side" + badFlag = "blas: illegal rotm flag" + + badLdA = "blas: bad leading dimension of A" + badLdB = "blas: bad leading dimension of B" + badLdC = "blas: bad leading dimension of C" + + shortX = "blas: insufficient length of x" + shortY = "blas: insufficient length of y" + shortAP = "blas: insufficient length of ap" + shortA = "blas: insufficient length of a" + shortB = "blas: insufficient length of b" + shortC = "blas: insufficient length of c" +) diff --git a/vendor/gonum.org/v1/gonum/blas/gonum/gonum.go b/vendor/gonum.org/v1/gonum/blas/gonum/gonum.go new file mode 100644 index 0000000000..5a5c111012 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/blas/gonum/gonum.go @@ -0,0 +1,38 @@ +// Copyright ©2015 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:generate ./single_precision.bash + +package gonum + +import ( + "math" + + "gonum.org/v1/gonum/internal/math32" +) + +type Implementation struct{} + +// [SD]gemm behavior constants. These are kept here to keep them out of the +// way during single precision code generation. +const ( + blockSize = 64 // b x b matrix + minParBlock = 4 // minimum number of blocks needed to go parallel +) + +// blocks returns the number of divisions of the dimension length with the given +// block size. +func blocks(dim, bsize int) int { + return (dim + bsize - 1) / bsize +} + +// dcabs1 returns |real(z)|+|imag(z)|. +func dcabs1(z complex128) float64 { + return math.Abs(real(z)) + math.Abs(imag(z)) +} + +// scabs1 returns |real(z)|+|imag(z)|. +func scabs1(z complex64) float32 { + return math32.Abs(real(z)) + math32.Abs(imag(z)) +} diff --git a/vendor/gonum.org/v1/gonum/blas/gonum/level1cmplx128.go b/vendor/gonum.org/v1/gonum/blas/gonum/level1cmplx128.go new file mode 100644 index 0000000000..3e3af0db13 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/blas/gonum/level1cmplx128.go @@ -0,0 +1,454 @@ +// Copyright ©2017 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gonum + +import ( + "math" + + "gonum.org/v1/gonum/blas" + "gonum.org/v1/gonum/internal/asm/c128" +) + +var _ blas.Complex128Level1 = Implementation{} + +// Dzasum returns the sum of the absolute values of the elements of x +// +// \sum_i |Re(x[i])| + |Im(x[i])| +// +// Dzasum returns 0 if incX is negative. +func (Implementation) Dzasum(n int, x []complex128, incX int) float64 { + if n < 0 { + panic(nLT0) + } + if incX < 1 { + if incX == 0 { + panic(zeroIncX) + } + return 0 + } + var sum float64 + if incX == 1 { + if len(x) < n { + panic(shortX) + } + for _, v := range x[:n] { + sum += dcabs1(v) + } + return sum + } + if (n-1)*incX >= len(x) { + panic(shortX) + } + for i := 0; i < n; i++ { + v := x[i*incX] + sum += dcabs1(v) + } + return sum +} + +// Dznrm2 computes the Euclidean norm of the complex vector x, +// +// ‖x‖_2 = sqrt(\sum_i x[i] * conj(x[i])). +// +// This function returns 0 if incX is negative. +func (Implementation) Dznrm2(n int, x []complex128, incX int) float64 { + if incX < 1 { + if incX == 0 { + panic(zeroIncX) + } + return 0 + } + if n < 1 { + if n == 0 { + return 0 + } + panic(nLT0) + } + if (n-1)*incX >= len(x) { + panic(shortX) + } + var ( + scale float64 + ssq float64 = 1 + ) + if incX == 1 { + for _, v := range x[:n] { + re, im := math.Abs(real(v)), math.Abs(imag(v)) + if re != 0 { + if re > scale { + ssq = 1 + ssq*(scale/re)*(scale/re) + scale = re + } else { + ssq += (re / scale) * (re / scale) + } + } + if im != 0 { + if im > scale { + ssq = 1 + ssq*(scale/im)*(scale/im) + scale = im + } else { + ssq += (im / scale) * (im / scale) + } + } + } + if math.IsInf(scale, 1) { + return math.Inf(1) + } + return scale * math.Sqrt(ssq) + } + for ix := 0; ix < n*incX; ix += incX { + re, im := math.Abs(real(x[ix])), math.Abs(imag(x[ix])) + if re != 0 { + if re > scale { + ssq = 1 + ssq*(scale/re)*(scale/re) + scale = re + } else { + ssq += (re / scale) * (re / scale) + } + } + if im != 0 { + if im > scale { + ssq = 1 + ssq*(scale/im)*(scale/im) + scale = im + } else { + ssq += (im / scale) * (im / scale) + } + } + } + if math.IsInf(scale, 1) { + return math.Inf(1) + } + return scale * math.Sqrt(ssq) +} + +// Izamax returns the index of the first element of x having largest |Re(·)|+|Im(·)|. +// Izamax returns -1 if n is 0 or incX is negative. +func (Implementation) Izamax(n int, x []complex128, incX int) int { + if incX < 1 { + if incX == 0 { + panic(zeroIncX) + } + // Return invalid index. + return -1 + } + if n < 1 { + if n == 0 { + // Return invalid index. + return -1 + } + panic(nLT0) + } + if len(x) <= (n-1)*incX { + panic(shortX) + } + idx := 0 + max := dcabs1(x[0]) + if incX == 1 { + for i, v := range x[1:n] { + absV := dcabs1(v) + if absV > max { + max = absV + idx = i + 1 + } + } + return idx + } + ix := incX + for i := 1; i < n; i++ { + absV := dcabs1(x[ix]) + if absV > max { + max = absV + idx = i + } + ix += incX + } + return idx +} + +// Zaxpy adds alpha times x to y: +// +// y[i] += alpha * x[i] for all i +func (Implementation) Zaxpy(n int, alpha complex128, x []complex128, incX int, y []complex128, incY int) { + if incX == 0 { + panic(zeroIncX) + } + if incY == 0 { + panic(zeroIncY) + } + if n < 1 { + if n == 0 { + return + } + panic(nLT0) + } + if (incX > 0 && (n-1)*incX >= len(x)) || (incX < 0 && (1-n)*incX >= len(x)) { + panic(shortX) + } + if (incY > 0 && (n-1)*incY >= len(y)) || (incY < 0 && (1-n)*incY >= len(y)) { + panic(shortY) + } + if alpha == 0 { + return + } + if incX == 1 && incY == 1 { + c128.AxpyUnitary(alpha, x[:n], y[:n]) + return + } + var ix, iy int + if incX < 0 { + ix = (1 - n) * incX + } + if incY < 0 { + iy = (1 - n) * incY + } + c128.AxpyInc(alpha, x, y, uintptr(n), uintptr(incX), uintptr(incY), uintptr(ix), uintptr(iy)) +} + +// Zcopy copies the vector x to vector y. +func (Implementation) Zcopy(n int, x []complex128, incX int, y []complex128, incY int) { + if incX == 0 { + panic(zeroIncX) + } + if incY == 0 { + panic(zeroIncY) + } + if n < 1 { + if n == 0 { + return + } + panic(nLT0) + } + if (incX > 0 && (n-1)*incX >= len(x)) || (incX < 0 && (1-n)*incX >= len(x)) { + panic(shortX) + } + if (incY > 0 && (n-1)*incY >= len(y)) || (incY < 0 && (1-n)*incY >= len(y)) { + panic(shortY) + } + if incX == 1 && incY == 1 { + copy(y[:n], x[:n]) + return + } + var ix, iy int + if incX < 0 { + ix = (-n + 1) * incX + } + if incY < 0 { + iy = (-n + 1) * incY + } + for i := 0; i < n; i++ { + y[iy] = x[ix] + ix += incX + iy += incY + } +} + +// Zdotc computes the dot product +// +// xᴴ · y +// +// of two complex vectors x and y. +func (Implementation) Zdotc(n int, x []complex128, incX int, y []complex128, incY int) complex128 { + if incX == 0 { + panic(zeroIncX) + } + if incY == 0 { + panic(zeroIncY) + } + if n <= 0 { + if n == 0 { + return 0 + } + panic(nLT0) + } + if incX == 1 && incY == 1 { + if len(x) < n { + panic(shortX) + } + if len(y) < n { + panic(shortY) + } + return c128.DotcUnitary(x[:n], y[:n]) + } + var ix, iy int + if incX < 0 { + ix = (-n + 1) * incX + } + if incY < 0 { + iy = (-n + 1) * incY + } + if ix >= len(x) || (n-1)*incX >= len(x) { + panic(shortX) + } + if iy >= len(y) || (n-1)*incY >= len(y) { + panic(shortY) + } + return c128.DotcInc(x, y, uintptr(n), uintptr(incX), uintptr(incY), uintptr(ix), uintptr(iy)) +} + +// Zdotu computes the dot product +// +// xᵀ · y +// +// of two complex vectors x and y. +func (Implementation) Zdotu(n int, x []complex128, incX int, y []complex128, incY int) complex128 { + if incX == 0 { + panic(zeroIncX) + } + if incY == 0 { + panic(zeroIncY) + } + if n <= 0 { + if n == 0 { + return 0 + } + panic(nLT0) + } + if incX == 1 && incY == 1 { + if len(x) < n { + panic(shortX) + } + if len(y) < n { + panic(shortY) + } + return c128.DotuUnitary(x[:n], y[:n]) + } + var ix, iy int + if incX < 0 { + ix = (-n + 1) * incX + } + if incY < 0 { + iy = (-n + 1) * incY + } + if ix >= len(x) || (n-1)*incX >= len(x) { + panic(shortX) + } + if iy >= len(y) || (n-1)*incY >= len(y) { + panic(shortY) + } + return c128.DotuInc(x, y, uintptr(n), uintptr(incX), uintptr(incY), uintptr(ix), uintptr(iy)) +} + +// Zdscal scales the vector x by a real scalar alpha. +// Zdscal has no effect if incX < 0. +func (Implementation) Zdscal(n int, alpha float64, x []complex128, incX int) { + if incX < 1 { + if incX == 0 { + panic(zeroIncX) + } + return + } + if (n-1)*incX >= len(x) { + panic(shortX) + } + if n < 1 { + if n == 0 { + return + } + panic(nLT0) + } + if alpha == 0 { + if incX == 1 { + x = x[:n] + for i := range x { + x[i] = 0 + } + return + } + for ix := 0; ix < n*incX; ix += incX { + x[ix] = 0 + } + return + } + if incX == 1 { + x = x[:n] + for i, v := range x { + x[i] = complex(alpha*real(v), alpha*imag(v)) + } + return + } + for ix := 0; ix < n*incX; ix += incX { + v := x[ix] + x[ix] = complex(alpha*real(v), alpha*imag(v)) + } +} + +// Zscal scales the vector x by a complex scalar alpha. +// Zscal has no effect if incX < 0. +func (Implementation) Zscal(n int, alpha complex128, x []complex128, incX int) { + if incX < 1 { + if incX == 0 { + panic(zeroIncX) + } + return + } + if (n-1)*incX >= len(x) { + panic(shortX) + } + if n < 1 { + if n == 0 { + return + } + panic(nLT0) + } + if alpha == 0 { + if incX == 1 { + x = x[:n] + for i := range x { + x[i] = 0 + } + return + } + for ix := 0; ix < n*incX; ix += incX { + x[ix] = 0 + } + return + } + if incX == 1 { + c128.ScalUnitary(alpha, x[:n]) + return + } + c128.ScalInc(alpha, x, uintptr(n), uintptr(incX)) +} + +// Zswap exchanges the elements of two complex vectors x and y. +func (Implementation) Zswap(n int, x []complex128, incX int, y []complex128, incY int) { + if incX == 0 { + panic(zeroIncX) + } + if incY == 0 { + panic(zeroIncY) + } + if n < 1 { + if n == 0 { + return + } + panic(nLT0) + } + if (incX > 0 && (n-1)*incX >= len(x)) || (incX < 0 && (1-n)*incX >= len(x)) { + panic(shortX) + } + if (incY > 0 && (n-1)*incY >= len(y)) || (incY < 0 && (1-n)*incY >= len(y)) { + panic(shortY) + } + if incX == 1 && incY == 1 { + x = x[:n] + for i, v := range x { + x[i], y[i] = y[i], v + } + return + } + var ix, iy int + if incX < 0 { + ix = (-n + 1) * incX + } + if incY < 0 { + iy = (-n + 1) * incY + } + for i := 0; i < n; i++ { + x[ix], y[iy] = y[iy], x[ix] + ix += incX + iy += incY + } +} diff --git a/vendor/gonum.org/v1/gonum/blas/gonum/level1cmplx64.go b/vendor/gonum.org/v1/gonum/blas/gonum/level1cmplx64.go new file mode 100644 index 0000000000..249335cada --- /dev/null +++ b/vendor/gonum.org/v1/gonum/blas/gonum/level1cmplx64.go @@ -0,0 +1,476 @@ +// Code generated by "go generate gonum.org/v1/gonum/blas/gonum”; DO NOT EDIT. + +// Copyright ©2017 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gonum + +import ( + math "gonum.org/v1/gonum/internal/math32" + + "gonum.org/v1/gonum/blas" + "gonum.org/v1/gonum/internal/asm/c64" +) + +var _ blas.Complex64Level1 = Implementation{} + +// Scasum returns the sum of the absolute values of the elements of x +// +// \sum_i |Re(x[i])| + |Im(x[i])| +// +// Scasum returns 0 if incX is negative. +// +// Complex64 implementations are autogenerated and not directly tested. +func (Implementation) Scasum(n int, x []complex64, incX int) float32 { + if n < 0 { + panic(nLT0) + } + if incX < 1 { + if incX == 0 { + panic(zeroIncX) + } + return 0 + } + var sum float32 + if incX == 1 { + if len(x) < n { + panic(shortX) + } + for _, v := range x[:n] { + sum += scabs1(v) + } + return sum + } + if (n-1)*incX >= len(x) { + panic(shortX) + } + for i := 0; i < n; i++ { + v := x[i*incX] + sum += scabs1(v) + } + return sum +} + +// Scnrm2 computes the Euclidean norm of the complex vector x, +// +// ‖x‖_2 = sqrt(\sum_i x[i] * conj(x[i])). +// +// This function returns 0 if incX is negative. +// +// Complex64 implementations are autogenerated and not directly tested. +func (Implementation) Scnrm2(n int, x []complex64, incX int) float32 { + if incX < 1 { + if incX == 0 { + panic(zeroIncX) + } + return 0 + } + if n < 1 { + if n == 0 { + return 0 + } + panic(nLT0) + } + if (n-1)*incX >= len(x) { + panic(shortX) + } + var ( + scale float32 + ssq float32 = 1 + ) + if incX == 1 { + for _, v := range x[:n] { + re, im := math.Abs(real(v)), math.Abs(imag(v)) + if re != 0 { + if re > scale { + ssq = 1 + ssq*(scale/re)*(scale/re) + scale = re + } else { + ssq += (re / scale) * (re / scale) + } + } + if im != 0 { + if im > scale { + ssq = 1 + ssq*(scale/im)*(scale/im) + scale = im + } else { + ssq += (im / scale) * (im / scale) + } + } + } + if math.IsInf(scale, 1) { + return math.Inf(1) + } + return scale * math.Sqrt(ssq) + } + for ix := 0; ix < n*incX; ix += incX { + re, im := math.Abs(real(x[ix])), math.Abs(imag(x[ix])) + if re != 0 { + if re > scale { + ssq = 1 + ssq*(scale/re)*(scale/re) + scale = re + } else { + ssq += (re / scale) * (re / scale) + } + } + if im != 0 { + if im > scale { + ssq = 1 + ssq*(scale/im)*(scale/im) + scale = im + } else { + ssq += (im / scale) * (im / scale) + } + } + } + if math.IsInf(scale, 1) { + return math.Inf(1) + } + return scale * math.Sqrt(ssq) +} + +// Icamax returns the index of the first element of x having largest |Re(·)|+|Im(·)|. +// Icamax returns -1 if n is 0 or incX is negative. +// +// Complex64 implementations are autogenerated and not directly tested. +func (Implementation) Icamax(n int, x []complex64, incX int) int { + if incX < 1 { + if incX == 0 { + panic(zeroIncX) + } + // Return invalid index. + return -1 + } + if n < 1 { + if n == 0 { + // Return invalid index. + return -1 + } + panic(nLT0) + } + if len(x) <= (n-1)*incX { + panic(shortX) + } + idx := 0 + max := scabs1(x[0]) + if incX == 1 { + for i, v := range x[1:n] { + absV := scabs1(v) + if absV > max { + max = absV + idx = i + 1 + } + } + return idx + } + ix := incX + for i := 1; i < n; i++ { + absV := scabs1(x[ix]) + if absV > max { + max = absV + idx = i + } + ix += incX + } + return idx +} + +// Caxpy adds alpha times x to y: +// +// y[i] += alpha * x[i] for all i +// +// Complex64 implementations are autogenerated and not directly tested. +func (Implementation) Caxpy(n int, alpha complex64, x []complex64, incX int, y []complex64, incY int) { + if incX == 0 { + panic(zeroIncX) + } + if incY == 0 { + panic(zeroIncY) + } + if n < 1 { + if n == 0 { + return + } + panic(nLT0) + } + if (incX > 0 && (n-1)*incX >= len(x)) || (incX < 0 && (1-n)*incX >= len(x)) { + panic(shortX) + } + if (incY > 0 && (n-1)*incY >= len(y)) || (incY < 0 && (1-n)*incY >= len(y)) { + panic(shortY) + } + if alpha == 0 { + return + } + if incX == 1 && incY == 1 { + c64.AxpyUnitary(alpha, x[:n], y[:n]) + return + } + var ix, iy int + if incX < 0 { + ix = (1 - n) * incX + } + if incY < 0 { + iy = (1 - n) * incY + } + c64.AxpyInc(alpha, x, y, uintptr(n), uintptr(incX), uintptr(incY), uintptr(ix), uintptr(iy)) +} + +// Ccopy copies the vector x to vector y. +// +// Complex64 implementations are autogenerated and not directly tested. +func (Implementation) Ccopy(n int, x []complex64, incX int, y []complex64, incY int) { + if incX == 0 { + panic(zeroIncX) + } + if incY == 0 { + panic(zeroIncY) + } + if n < 1 { + if n == 0 { + return + } + panic(nLT0) + } + if (incX > 0 && (n-1)*incX >= len(x)) || (incX < 0 && (1-n)*incX >= len(x)) { + panic(shortX) + } + if (incY > 0 && (n-1)*incY >= len(y)) || (incY < 0 && (1-n)*incY >= len(y)) { + panic(shortY) + } + if incX == 1 && incY == 1 { + copy(y[:n], x[:n]) + return + } + var ix, iy int + if incX < 0 { + ix = (-n + 1) * incX + } + if incY < 0 { + iy = (-n + 1) * incY + } + for i := 0; i < n; i++ { + y[iy] = x[ix] + ix += incX + iy += incY + } +} + +// Cdotc computes the dot product +// +// xᴴ · y +// +// of two complex vectors x and y. +// +// Complex64 implementations are autogenerated and not directly tested. +func (Implementation) Cdotc(n int, x []complex64, incX int, y []complex64, incY int) complex64 { + if incX == 0 { + panic(zeroIncX) + } + if incY == 0 { + panic(zeroIncY) + } + if n <= 0 { + if n == 0 { + return 0 + } + panic(nLT0) + } + if incX == 1 && incY == 1 { + if len(x) < n { + panic(shortX) + } + if len(y) < n { + panic(shortY) + } + return c64.DotcUnitary(x[:n], y[:n]) + } + var ix, iy int + if incX < 0 { + ix = (-n + 1) * incX + } + if incY < 0 { + iy = (-n + 1) * incY + } + if ix >= len(x) || (n-1)*incX >= len(x) { + panic(shortX) + } + if iy >= len(y) || (n-1)*incY >= len(y) { + panic(shortY) + } + return c64.DotcInc(x, y, uintptr(n), uintptr(incX), uintptr(incY), uintptr(ix), uintptr(iy)) +} + +// Cdotu computes the dot product +// +// xᵀ · y +// +// of two complex vectors x and y. +// +// Complex64 implementations are autogenerated and not directly tested. +func (Implementation) Cdotu(n int, x []complex64, incX int, y []complex64, incY int) complex64 { + if incX == 0 { + panic(zeroIncX) + } + if incY == 0 { + panic(zeroIncY) + } + if n <= 0 { + if n == 0 { + return 0 + } + panic(nLT0) + } + if incX == 1 && incY == 1 { + if len(x) < n { + panic(shortX) + } + if len(y) < n { + panic(shortY) + } + return c64.DotuUnitary(x[:n], y[:n]) + } + var ix, iy int + if incX < 0 { + ix = (-n + 1) * incX + } + if incY < 0 { + iy = (-n + 1) * incY + } + if ix >= len(x) || (n-1)*incX >= len(x) { + panic(shortX) + } + if iy >= len(y) || (n-1)*incY >= len(y) { + panic(shortY) + } + return c64.DotuInc(x, y, uintptr(n), uintptr(incX), uintptr(incY), uintptr(ix), uintptr(iy)) +} + +// Csscal scales the vector x by a real scalar alpha. +// Csscal has no effect if incX < 0. +// +// Complex64 implementations are autogenerated and not directly tested. +func (Implementation) Csscal(n int, alpha float32, x []complex64, incX int) { + if incX < 1 { + if incX == 0 { + panic(zeroIncX) + } + return + } + if (n-1)*incX >= len(x) { + panic(shortX) + } + if n < 1 { + if n == 0 { + return + } + panic(nLT0) + } + if alpha == 0 { + if incX == 1 { + x = x[:n] + for i := range x { + x[i] = 0 + } + return + } + for ix := 0; ix < n*incX; ix += incX { + x[ix] = 0 + } + return + } + if incX == 1 { + x = x[:n] + for i, v := range x { + x[i] = complex(alpha*real(v), alpha*imag(v)) + } + return + } + for ix := 0; ix < n*incX; ix += incX { + v := x[ix] + x[ix] = complex(alpha*real(v), alpha*imag(v)) + } +} + +// Cscal scales the vector x by a complex scalar alpha. +// Cscal has no effect if incX < 0. +// +// Complex64 implementations are autogenerated and not directly tested. +func (Implementation) Cscal(n int, alpha complex64, x []complex64, incX int) { + if incX < 1 { + if incX == 0 { + panic(zeroIncX) + } + return + } + if (n-1)*incX >= len(x) { + panic(shortX) + } + if n < 1 { + if n == 0 { + return + } + panic(nLT0) + } + if alpha == 0 { + if incX == 1 { + x = x[:n] + for i := range x { + x[i] = 0 + } + return + } + for ix := 0; ix < n*incX; ix += incX { + x[ix] = 0 + } + return + } + if incX == 1 { + c64.ScalUnitary(alpha, x[:n]) + return + } + c64.ScalInc(alpha, x, uintptr(n), uintptr(incX)) +} + +// Cswap exchanges the elements of two complex vectors x and y. +// +// Complex64 implementations are autogenerated and not directly tested. +func (Implementation) Cswap(n int, x []complex64, incX int, y []complex64, incY int) { + if incX == 0 { + panic(zeroIncX) + } + if incY == 0 { + panic(zeroIncY) + } + if n < 1 { + if n == 0 { + return + } + panic(nLT0) + } + if (incX > 0 && (n-1)*incX >= len(x)) || (incX < 0 && (1-n)*incX >= len(x)) { + panic(shortX) + } + if (incY > 0 && (n-1)*incY >= len(y)) || (incY < 0 && (1-n)*incY >= len(y)) { + panic(shortY) + } + if incX == 1 && incY == 1 { + x = x[:n] + for i, v := range x { + x[i], y[i] = y[i], v + } + return + } + var ix, iy int + if incX < 0 { + ix = (-n + 1) * incX + } + if incY < 0 { + iy = (-n + 1) * incY + } + for i := 0; i < n; i++ { + x[ix], y[iy] = y[iy], x[ix] + ix += incX + iy += incY + } +} diff --git a/vendor/gonum.org/v1/gonum/blas/gonum/level1float32.go b/vendor/gonum.org/v1/gonum/blas/gonum/level1float32.go new file mode 100644 index 0000000000..a90b88aceb --- /dev/null +++ b/vendor/gonum.org/v1/gonum/blas/gonum/level1float32.go @@ -0,0 +1,653 @@ +// Code generated by "go generate gonum.org/v1/gonum/blas/gonum”; DO NOT EDIT. + +// Copyright ©2015 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gonum + +import ( + math "gonum.org/v1/gonum/internal/math32" + + "gonum.org/v1/gonum/blas" + "gonum.org/v1/gonum/internal/asm/f32" +) + +var _ blas.Float32Level1 = Implementation{} + +// Snrm2 computes the Euclidean norm of a vector, +// +// sqrt(\sum_i x[i] * x[i]). +// +// This function returns 0 if incX is negative. +// +// Float32 implementations are autogenerated and not directly tested. +func (Implementation) Snrm2(n int, x []float32, incX int) float32 { + if incX < 1 { + if incX == 0 { + panic(zeroIncX) + } + return 0 + } + if len(x) <= (n-1)*incX { + panic(shortX) + } + if n < 2 { + if n == 1 { + return math.Abs(x[0]) + } + if n == 0 { + return 0 + } + panic(nLT0) + } + if incX == 1 { + return f32.L2NormUnitary(x[:n]) + } + return f32.L2NormInc(x, uintptr(n), uintptr(incX)) +} + +// Sasum computes the sum of the absolute values of the elements of x. +// +// \sum_i |x[i]| +// +// Sasum returns 0 if incX is negative. +// +// Float32 implementations are autogenerated and not directly tested. +func (Implementation) Sasum(n int, x []float32, incX int) float32 { + var sum float32 + if n < 0 { + panic(nLT0) + } + if incX < 1 { + if incX == 0 { + panic(zeroIncX) + } + return 0 + } + if len(x) <= (n-1)*incX { + panic(shortX) + } + if incX == 1 { + x = x[:n] + for _, v := range x { + sum += math.Abs(v) + } + return sum + } + for i := 0; i < n; i++ { + sum += math.Abs(x[i*incX]) + } + return sum +} + +// Isamax returns the index of an element of x with the largest absolute value. +// If there are multiple such indices the earliest is returned. +// Isamax returns -1 if n == 0. +// +// Float32 implementations are autogenerated and not directly tested. +func (Implementation) Isamax(n int, x []float32, incX int) int { + if incX < 1 { + if incX == 0 { + panic(zeroIncX) + } + return -1 + } + if len(x) <= (n-1)*incX { + panic(shortX) + } + if n < 2 { + if n == 1 { + return 0 + } + if n == 0 { + return -1 // Netlib returns invalid index when n == 0. + } + panic(nLT0) + } + idx := 0 + max := math.Abs(x[0]) + if incX == 1 { + for i, v := range x[:n] { + absV := math.Abs(v) + if absV > max { + max = absV + idx = i + } + } + return idx + } + ix := incX + for i := 1; i < n; i++ { + v := x[ix] + absV := math.Abs(v) + if absV > max { + max = absV + idx = i + } + ix += incX + } + return idx +} + +// Sswap exchanges the elements of two vectors. +// +// x[i], y[i] = y[i], x[i] for all i +// +// Float32 implementations are autogenerated and not directly tested. +func (Implementation) Sswap(n int, x []float32, incX int, y []float32, incY int) { + if incX == 0 { + panic(zeroIncX) + } + if incY == 0 { + panic(zeroIncY) + } + if n < 1 { + if n == 0 { + return + } + panic(nLT0) + } + if (incX > 0 && len(x) <= (n-1)*incX) || (incX < 0 && len(x) <= (1-n)*incX) { + panic(shortX) + } + if (incY > 0 && len(y) <= (n-1)*incY) || (incY < 0 && len(y) <= (1-n)*incY) { + panic(shortY) + } + if incX == 1 && incY == 1 { + x = x[:n] + for i, v := range x { + x[i], y[i] = y[i], v + } + return + } + var ix, iy int + if incX < 0 { + ix = (-n + 1) * incX + } + if incY < 0 { + iy = (-n + 1) * incY + } + for i := 0; i < n; i++ { + x[ix], y[iy] = y[iy], x[ix] + ix += incX + iy += incY + } +} + +// Scopy copies the elements of x into the elements of y. +// +// y[i] = x[i] for all i +// +// Float32 implementations are autogenerated and not directly tested. +func (Implementation) Scopy(n int, x []float32, incX int, y []float32, incY int) { + if incX == 0 { + panic(zeroIncX) + } + if incY == 0 { + panic(zeroIncY) + } + if n < 1 { + if n == 0 { + return + } + panic(nLT0) + } + if (incX > 0 && len(x) <= (n-1)*incX) || (incX < 0 && len(x) <= (1-n)*incX) { + panic(shortX) + } + if (incY > 0 && len(y) <= (n-1)*incY) || (incY < 0 && len(y) <= (1-n)*incY) { + panic(shortY) + } + if incX == 1 && incY == 1 { + copy(y[:n], x[:n]) + return + } + var ix, iy int + if incX < 0 { + ix = (-n + 1) * incX + } + if incY < 0 { + iy = (-n + 1) * incY + } + for i := 0; i < n; i++ { + y[iy] = x[ix] + ix += incX + iy += incY + } +} + +// Saxpy adds alpha times x to y +// +// y[i] += alpha * x[i] for all i +// +// Float32 implementations are autogenerated and not directly tested. +func (Implementation) Saxpy(n int, alpha float32, x []float32, incX int, y []float32, incY int) { + if incX == 0 { + panic(zeroIncX) + } + if incY == 0 { + panic(zeroIncY) + } + if n < 1 { + if n == 0 { + return + } + panic(nLT0) + } + if (incX > 0 && len(x) <= (n-1)*incX) || (incX < 0 && len(x) <= (1-n)*incX) { + panic(shortX) + } + if (incY > 0 && len(y) <= (n-1)*incY) || (incY < 0 && len(y) <= (1-n)*incY) { + panic(shortY) + } + if alpha == 0 { + return + } + if incX == 1 && incY == 1 { + f32.AxpyUnitary(alpha, x[:n], y[:n]) + return + } + var ix, iy int + if incX < 0 { + ix = (-n + 1) * incX + } + if incY < 0 { + iy = (-n + 1) * incY + } + f32.AxpyInc(alpha, x, y, uintptr(n), uintptr(incX), uintptr(incY), uintptr(ix), uintptr(iy)) +} + +// Srotg computes a plane rotation +// +// ⎡ c s ⎤ ⎡ a ⎤ = ⎡ r ⎤ +// ⎣ -s c ⎦ ⎣ b ⎦ ⎣ 0 ⎦ +// +// satisfying c^2 + s^2 = 1. +// +// The computation uses the formulas +// +// sigma = sgn(a) if |a| > |b| +// = sgn(b) if |b| >= |a| +// r = sigma*sqrt(a^2 + b^2) +// c = 1; s = 0 if r = 0 +// c = a/r; s = b/r if r != 0 +// c >= 0 if |a| > |b| +// +// The subroutine also computes +// +// z = s if |a| > |b|, +// = 1/c if |b| >= |a| and c != 0 +// = 1 if c = 0 +// +// This allows c and s to be reconstructed from z as follows: +// +// If z = 1, set c = 0, s = 1. +// If |z| < 1, set c = sqrt(1 - z^2) and s = z. +// If |z| > 1, set c = 1/z and s = sqrt(1 - c^2). +// +// NOTE: There is a discrepancy between the reference implementation and the +// BLAS technical manual regarding the sign for r when a or b are zero. Drotg +// agrees with the definition in the manual and other common BLAS +// implementations. +// +// Float32 implementations are autogenerated and not directly tested. +func (Implementation) Srotg(a, b float32) (c, s, r, z float32) { + // Implementation based on Supplemental Material to: + // Edward Anderson. 2017. Algorithm 978: Safe Scaling in the Level 1 BLAS. + // ACM Trans. Math. Softw. 44, 1, Article 12 (July 2017), 28 pages. + // DOI: https://doi.org/10.1145/3061665 + const ( + safmin = 0x1p-126 + safmax = 1 / safmin + ) + anorm := math.Abs(a) + bnorm := math.Abs(b) + switch { + case bnorm == 0: + c = 1 + s = 0 + r = a + z = 0 + case anorm == 0: + c = 0 + s = 1 + r = b + z = 1 + default: + maxab := math.Max(anorm, bnorm) + scl := math.Min(math.Max(safmin, maxab), safmax) + var sigma float32 + if anorm > bnorm { + sigma = math.Copysign(1, a) + } else { + sigma = math.Copysign(1, b) + } + ascl := a / scl + bscl := b / scl + r = sigma * (scl * math.Sqrt(ascl*ascl+bscl*bscl)) + c = a / r + s = b / r + switch { + case anorm > bnorm: + z = s + case c != 0: + z = 1 / c + default: + z = 1 + } + } + return c, s, r, z +} + +// Srotmg computes the modified Givens rotation. See +// http://www.netlib.org/lapack/explore-html/df/deb/drotmg_8f.html +// for more details. +// +// Float32 implementations are autogenerated and not directly tested. +func (Implementation) Srotmg(d1, d2, x1, y1 float32) (p blas.SrotmParams, rd1, rd2, rx1 float32) { + // The implementation of Drotmg used here is taken from Hopkins 1997 + // Appendix A: https://doi.org/10.1145/289251.289253 + // with the exception of the gam constants below. + + const ( + gam = 4096.0 + gamsq = gam * gam + rgamsq = 1.0 / gamsq + ) + + if d1 < 0 { + p.Flag = blas.Rescaling // Error state. + return p, 0, 0, 0 + } + + if d2 == 0 || y1 == 0 { + p.Flag = blas.Identity + return p, d1, d2, x1 + } + + var h11, h12, h21, h22 float32 + if (d1 == 0 || x1 == 0) && d2 > 0 { + p.Flag = blas.Diagonal + h12 = 1 + h21 = -1 + x1 = y1 + d1, d2 = d2, d1 + } else { + p2 := d2 * y1 + p1 := d1 * x1 + q2 := p2 * y1 + q1 := p1 * x1 + if math.Abs(q1) > math.Abs(q2) { + p.Flag = blas.OffDiagonal + h11 = 1 + h22 = 1 + h21 = -y1 / x1 + h12 = p2 / p1 + u := 1 - float32(h12*h21) + if u <= 0 { + p.Flag = blas.Rescaling // Error state. + return p, 0, 0, 0 + } + + d1 /= u + d2 /= u + x1 *= u + } else { + if q2 < 0 { + p.Flag = blas.Rescaling // Error state. + return p, 0, 0, 0 + } + + p.Flag = blas.Diagonal + h21 = -1 + h12 = 1 + h11 = p1 / p2 + h22 = x1 / y1 + u := 1 + float32(h11*h22) + d1, d2 = d2/u, d1/u + x1 = y1 * u + } + } + + for d1 <= rgamsq && d1 != 0 { + p.Flag = blas.Rescaling + d1 = (d1 * gam) * gam + x1 /= gam + h11 /= gam + h12 /= gam + } + for d1 > gamsq { + p.Flag = blas.Rescaling + d1 = (d1 / gam) / gam + x1 *= gam + h11 *= gam + h12 *= gam + } + + for math.Abs(d2) <= rgamsq && d2 != 0 { + p.Flag = blas.Rescaling + d2 = (d2 * gam) * gam + h21 /= gam + h22 /= gam + } + for math.Abs(d2) > gamsq { + p.Flag = blas.Rescaling + d2 = (d2 / gam) / gam + h21 *= gam + h22 *= gam + } + + switch p.Flag { + case blas.Diagonal: + p.H = [4]float32{0: h11, 3: h22} + case blas.OffDiagonal: + p.H = [4]float32{1: h21, 2: h12} + case blas.Rescaling: + p.H = [4]float32{h11, h21, h12, h22} + default: + panic(badFlag) + } + + return p, d1, d2, x1 +} + +// Srot applies a plane transformation. +// +// x[i] = c * x[i] + s * y[i] +// y[i] = c * y[i] - s * x[i] +// +// Float32 implementations are autogenerated and not directly tested. +func (Implementation) Srot(n int, x []float32, incX int, y []float32, incY int, c float32, s float32) { + if incX == 0 { + panic(zeroIncX) + } + if incY == 0 { + panic(zeroIncY) + } + if n < 1 { + if n == 0 { + return + } + panic(nLT0) + } + if (incX > 0 && len(x) <= (n-1)*incX) || (incX < 0 && len(x) <= (1-n)*incX) { + panic(shortX) + } + if (incY > 0 && len(y) <= (n-1)*incY) || (incY < 0 && len(y) <= (1-n)*incY) { + panic(shortY) + } + if incX == 1 && incY == 1 { + x = x[:n] + for i, vx := range x { + vy := y[i] + x[i], y[i] = c*vx+s*vy, c*vy-s*vx + } + return + } + var ix, iy int + if incX < 0 { + ix = (-n + 1) * incX + } + if incY < 0 { + iy = (-n + 1) * incY + } + for i := 0; i < n; i++ { + vx := x[ix] + vy := y[iy] + x[ix], y[iy] = c*vx+s*vy, c*vy-s*vx + ix += incX + iy += incY + } +} + +// Srotm applies the modified Givens rotation to the 2×n matrix. +// +// Float32 implementations are autogenerated and not directly tested. +func (Implementation) Srotm(n int, x []float32, incX int, y []float32, incY int, p blas.SrotmParams) { + if incX == 0 { + panic(zeroIncX) + } + if incY == 0 { + panic(zeroIncY) + } + if n <= 0 { + if n == 0 { + return + } + panic(nLT0) + } + if (incX > 0 && len(x) <= (n-1)*incX) || (incX < 0 && len(x) <= (1-n)*incX) { + panic(shortX) + } + if (incY > 0 && len(y) <= (n-1)*incY) || (incY < 0 && len(y) <= (1-n)*incY) { + panic(shortY) + } + + if p.Flag == blas.Identity { + return + } + + switch p.Flag { + case blas.Rescaling: + h11 := p.H[0] + h12 := p.H[2] + h21 := p.H[1] + h22 := p.H[3] + if incX == 1 && incY == 1 { + x = x[:n] + for i, vx := range x { + vy := y[i] + x[i], y[i] = float32(vx*h11)+float32(vy*h12), float32(vx*h21)+float32(vy*h22) + } + return + } + var ix, iy int + if incX < 0 { + ix = (-n + 1) * incX + } + if incY < 0 { + iy = (-n + 1) * incY + } + for i := 0; i < n; i++ { + vx := x[ix] + vy := y[iy] + x[ix], y[iy] = float32(vx*h11)+float32(vy*h12), float32(vx*h21)+float32(vy*h22) + ix += incX + iy += incY + } + case blas.OffDiagonal: + h12 := p.H[2] + h21 := p.H[1] + if incX == 1 && incY == 1 { + x = x[:n] + for i, vx := range x { + vy := y[i] + x[i], y[i] = vx+float32(vy*h12), float32(vx*h21)+vy + } + return + } + var ix, iy int + if incX < 0 { + ix = (-n + 1) * incX + } + if incY < 0 { + iy = (-n + 1) * incY + } + for i := 0; i < n; i++ { + vx := x[ix] + vy := y[iy] + x[ix], y[iy] = vx+float32(vy*h12), float32(vx*h21)+vy + ix += incX + iy += incY + } + case blas.Diagonal: + h11 := p.H[0] + h22 := p.H[3] + if incX == 1 && incY == 1 { + x = x[:n] + for i, vx := range x { + vy := y[i] + x[i], y[i] = float32(vx*h11)+vy, -vx+float32(vy*h22) + } + return + } + var ix, iy int + if incX < 0 { + ix = (-n + 1) * incX + } + if incY < 0 { + iy = (-n + 1) * incY + } + for i := 0; i < n; i++ { + vx := x[ix] + vy := y[iy] + x[ix], y[iy] = float32(vx*h11)+vy, -vx+float32(vy*h22) + ix += incX + iy += incY + } + } +} + +// Sscal scales x by alpha. +// +// x[i] *= alpha +// +// Sscal has no effect if incX < 0. +// +// Float32 implementations are autogenerated and not directly tested. +func (Implementation) Sscal(n int, alpha float32, x []float32, incX int) { + if incX < 1 { + if incX == 0 { + panic(zeroIncX) + } + return + } + if n < 1 { + if n == 0 { + return + } + panic(nLT0) + } + if (n-1)*incX >= len(x) { + panic(shortX) + } + if alpha == 0 { + if incX == 1 { + x = x[:n] + for i := range x { + x[i] = 0 + } + return + } + for ix := 0; ix < n*incX; ix += incX { + x[ix] = 0 + } + return + } + if incX == 1 { + f32.ScalUnitary(alpha, x[:n]) + return + } + f32.ScalInc(alpha, x, uintptr(n), uintptr(incX)) +} diff --git a/vendor/gonum.org/v1/gonum/blas/gonum/level1float32_dsdot.go b/vendor/gonum.org/v1/gonum/blas/gonum/level1float32_dsdot.go new file mode 100644 index 0000000000..cd7df4110a --- /dev/null +++ b/vendor/gonum.org/v1/gonum/blas/gonum/level1float32_dsdot.go @@ -0,0 +1,54 @@ +// Code generated by "go generate gonum.org/v1/gonum/blas/gonum”; DO NOT EDIT. + +// Copyright ©2015 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gonum + +import ( + "gonum.org/v1/gonum/internal/asm/f32" +) + +// Dsdot computes the dot product of the two vectors +// +// \sum_i x[i]*y[i] +// +// Float32 implementations are autogenerated and not directly tested. +func (Implementation) Dsdot(n int, x []float32, incX int, y []float32, incY int) float64 { + if incX == 0 { + panic(zeroIncX) + } + if incY == 0 { + panic(zeroIncY) + } + if n <= 0 { + if n == 0 { + return 0 + } + panic(nLT0) + } + if incX == 1 && incY == 1 { + if len(x) < n { + panic(shortX) + } + if len(y) < n { + panic(shortY) + } + return f32.DdotUnitary(x[:n], y[:n]) + } + var ix, iy int + if incX < 0 { + ix = (-n + 1) * incX + } + if incY < 0 { + iy = (-n + 1) * incY + } + if ix >= len(x) || ix+(n-1)*incX >= len(x) { + panic(shortX) + } + if iy >= len(y) || iy+(n-1)*incY >= len(y) { + panic(shortY) + } + return f32.DdotInc(x, y, uintptr(n), uintptr(incX), uintptr(incY), uintptr(ix), uintptr(iy)) +} diff --git a/vendor/gonum.org/v1/gonum/blas/gonum/level1float32_sdot.go b/vendor/gonum.org/v1/gonum/blas/gonum/level1float32_sdot.go new file mode 100644 index 0000000000..c4cc166322 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/blas/gonum/level1float32_sdot.go @@ -0,0 +1,54 @@ +// Code generated by "go generate gonum.org/v1/gonum/blas/gonum”; DO NOT EDIT. + +// Copyright ©2015 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gonum + +import ( + "gonum.org/v1/gonum/internal/asm/f32" +) + +// Sdot computes the dot product of the two vectors +// +// \sum_i x[i]*y[i] +// +// Float32 implementations are autogenerated and not directly tested. +func (Implementation) Sdot(n int, x []float32, incX int, y []float32, incY int) float32 { + if incX == 0 { + panic(zeroIncX) + } + if incY == 0 { + panic(zeroIncY) + } + if n <= 0 { + if n == 0 { + return 0 + } + panic(nLT0) + } + if incX == 1 && incY == 1 { + if len(x) < n { + panic(shortX) + } + if len(y) < n { + panic(shortY) + } + return f32.DotUnitary(x[:n], y[:n]) + } + var ix, iy int + if incX < 0 { + ix = (-n + 1) * incX + } + if incY < 0 { + iy = (-n + 1) * incY + } + if ix >= len(x) || ix+(n-1)*incX >= len(x) { + panic(shortX) + } + if iy >= len(y) || iy+(n-1)*incY >= len(y) { + panic(shortY) + } + return f32.DotInc(x, y, uintptr(n), uintptr(incX), uintptr(incY), uintptr(ix), uintptr(iy)) +} diff --git a/vendor/gonum.org/v1/gonum/blas/gonum/level1float32_sdsdot.go b/vendor/gonum.org/v1/gonum/blas/gonum/level1float32_sdsdot.go new file mode 100644 index 0000000000..eb6b73bd41 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/blas/gonum/level1float32_sdsdot.go @@ -0,0 +1,54 @@ +// Code generated by "go generate gonum.org/v1/gonum/blas/gonum”; DO NOT EDIT. + +// Copyright ©2015 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gonum + +import ( + "gonum.org/v1/gonum/internal/asm/f32" +) + +// Sdsdot computes the dot product of the two vectors plus a constant +// +// alpha + \sum_i x[i]*y[i] +// +// Float32 implementations are autogenerated and not directly tested. +func (Implementation) Sdsdot(n int, alpha float32, x []float32, incX int, y []float32, incY int) float32 { + if incX == 0 { + panic(zeroIncX) + } + if incY == 0 { + panic(zeroIncY) + } + if n <= 0 { + if n == 0 { + return 0 + } + panic(nLT0) + } + if incX == 1 && incY == 1 { + if len(x) < n { + panic(shortX) + } + if len(y) < n { + panic(shortY) + } + return alpha + float32(f32.DdotUnitary(x[:n], y[:n])) + } + var ix, iy int + if incX < 0 { + ix = (-n + 1) * incX + } + if incY < 0 { + iy = (-n + 1) * incY + } + if ix >= len(x) || ix+(n-1)*incX >= len(x) { + panic(shortX) + } + if iy >= len(y) || iy+(n-1)*incY >= len(y) { + panic(shortY) + } + return alpha + float32(f32.DdotInc(x, y, uintptr(n), uintptr(incX), uintptr(incY), uintptr(ix), uintptr(iy))) +} diff --git a/vendor/gonum.org/v1/gonum/blas/gonum/level1float64.go b/vendor/gonum.org/v1/gonum/blas/gonum/level1float64.go new file mode 100644 index 0000000000..795769d966 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/blas/gonum/level1float64.go @@ -0,0 +1,629 @@ +// Copyright ©2015 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gonum + +import ( + "math" + + "gonum.org/v1/gonum/blas" + "gonum.org/v1/gonum/internal/asm/f64" +) + +var _ blas.Float64Level1 = Implementation{} + +// Dnrm2 computes the Euclidean norm of a vector, +// +// sqrt(\sum_i x[i] * x[i]). +// +// This function returns 0 if incX is negative. +func (Implementation) Dnrm2(n int, x []float64, incX int) float64 { + if incX < 1 { + if incX == 0 { + panic(zeroIncX) + } + return 0 + } + if len(x) <= (n-1)*incX { + panic(shortX) + } + if n < 2 { + if n == 1 { + return math.Abs(x[0]) + } + if n == 0 { + return 0 + } + panic(nLT0) + } + if incX == 1 { + return f64.L2NormUnitary(x[:n]) + } + return f64.L2NormInc(x, uintptr(n), uintptr(incX)) +} + +// Dasum computes the sum of the absolute values of the elements of x. +// +// \sum_i |x[i]| +// +// Dasum returns 0 if incX is negative. +func (Implementation) Dasum(n int, x []float64, incX int) float64 { + var sum float64 + if n < 0 { + panic(nLT0) + } + if incX < 1 { + if incX == 0 { + panic(zeroIncX) + } + return 0 + } + if len(x) <= (n-1)*incX { + panic(shortX) + } + if incX == 1 { + x = x[:n] + for _, v := range x { + sum += math.Abs(v) + } + return sum + } + for i := 0; i < n; i++ { + sum += math.Abs(x[i*incX]) + } + return sum +} + +// Idamax returns the index of an element of x with the largest absolute value. +// If there are multiple such indices the earliest is returned. +// Idamax returns -1 if n == 0. +func (Implementation) Idamax(n int, x []float64, incX int) int { + if incX < 1 { + if incX == 0 { + panic(zeroIncX) + } + return -1 + } + if len(x) <= (n-1)*incX { + panic(shortX) + } + if n < 2 { + if n == 1 { + return 0 + } + if n == 0 { + return -1 // Netlib returns invalid index when n == 0. + } + panic(nLT0) + } + idx := 0 + max := math.Abs(x[0]) + if incX == 1 { + for i, v := range x[:n] { + absV := math.Abs(v) + if absV > max { + max = absV + idx = i + } + } + return idx + } + ix := incX + for i := 1; i < n; i++ { + v := x[ix] + absV := math.Abs(v) + if absV > max { + max = absV + idx = i + } + ix += incX + } + return idx +} + +// Dswap exchanges the elements of two vectors. +// +// x[i], y[i] = y[i], x[i] for all i +func (Implementation) Dswap(n int, x []float64, incX int, y []float64, incY int) { + if incX == 0 { + panic(zeroIncX) + } + if incY == 0 { + panic(zeroIncY) + } + if n < 1 { + if n == 0 { + return + } + panic(nLT0) + } + if (incX > 0 && len(x) <= (n-1)*incX) || (incX < 0 && len(x) <= (1-n)*incX) { + panic(shortX) + } + if (incY > 0 && len(y) <= (n-1)*incY) || (incY < 0 && len(y) <= (1-n)*incY) { + panic(shortY) + } + if incX == 1 && incY == 1 { + x = x[:n] + for i, v := range x { + x[i], y[i] = y[i], v + } + return + } + var ix, iy int + if incX < 0 { + ix = (-n + 1) * incX + } + if incY < 0 { + iy = (-n + 1) * incY + } + for i := 0; i < n; i++ { + x[ix], y[iy] = y[iy], x[ix] + ix += incX + iy += incY + } +} + +// Dcopy copies the elements of x into the elements of y. +// +// y[i] = x[i] for all i +func (Implementation) Dcopy(n int, x []float64, incX int, y []float64, incY int) { + if incX == 0 { + panic(zeroIncX) + } + if incY == 0 { + panic(zeroIncY) + } + if n < 1 { + if n == 0 { + return + } + panic(nLT0) + } + if (incX > 0 && len(x) <= (n-1)*incX) || (incX < 0 && len(x) <= (1-n)*incX) { + panic(shortX) + } + if (incY > 0 && len(y) <= (n-1)*incY) || (incY < 0 && len(y) <= (1-n)*incY) { + panic(shortY) + } + if incX == 1 && incY == 1 { + copy(y[:n], x[:n]) + return + } + var ix, iy int + if incX < 0 { + ix = (-n + 1) * incX + } + if incY < 0 { + iy = (-n + 1) * incY + } + for i := 0; i < n; i++ { + y[iy] = x[ix] + ix += incX + iy += incY + } +} + +// Daxpy adds alpha times x to y +// +// y[i] += alpha * x[i] for all i +func (Implementation) Daxpy(n int, alpha float64, x []float64, incX int, y []float64, incY int) { + if incX == 0 { + panic(zeroIncX) + } + if incY == 0 { + panic(zeroIncY) + } + if n < 1 { + if n == 0 { + return + } + panic(nLT0) + } + if (incX > 0 && len(x) <= (n-1)*incX) || (incX < 0 && len(x) <= (1-n)*incX) { + panic(shortX) + } + if (incY > 0 && len(y) <= (n-1)*incY) || (incY < 0 && len(y) <= (1-n)*incY) { + panic(shortY) + } + if alpha == 0 { + return + } + if incX == 1 && incY == 1 { + f64.AxpyUnitary(alpha, x[:n], y[:n]) + return + } + var ix, iy int + if incX < 0 { + ix = (-n + 1) * incX + } + if incY < 0 { + iy = (-n + 1) * incY + } + f64.AxpyInc(alpha, x, y, uintptr(n), uintptr(incX), uintptr(incY), uintptr(ix), uintptr(iy)) +} + +// Drotg computes a plane rotation +// +// ⎡ c s ⎤ ⎡ a ⎤ = ⎡ r ⎤ +// ⎣ -s c ⎦ ⎣ b ⎦ ⎣ 0 ⎦ +// +// satisfying c^2 + s^2 = 1. +// +// The computation uses the formulas +// +// sigma = sgn(a) if |a| > |b| +// = sgn(b) if |b| >= |a| +// r = sigma*sqrt(a^2 + b^2) +// c = 1; s = 0 if r = 0 +// c = a/r; s = b/r if r != 0 +// c >= 0 if |a| > |b| +// +// The subroutine also computes +// +// z = s if |a| > |b|, +// = 1/c if |b| >= |a| and c != 0 +// = 1 if c = 0 +// +// This allows c and s to be reconstructed from z as follows: +// +// If z = 1, set c = 0, s = 1. +// If |z| < 1, set c = sqrt(1 - z^2) and s = z. +// If |z| > 1, set c = 1/z and s = sqrt(1 - c^2). +// +// NOTE: There is a discrepancy between the reference implementation and the +// BLAS technical manual regarding the sign for r when a or b are zero. Drotg +// agrees with the definition in the manual and other common BLAS +// implementations. +func (Implementation) Drotg(a, b float64) (c, s, r, z float64) { + // Implementation based on Supplemental Material to: + // Edward Anderson. 2017. Algorithm 978: Safe Scaling in the Level 1 BLAS. + // ACM Trans. Math. Softw. 44, 1, Article 12 (July 2017), 28 pages. + // DOI: https://doi.org/10.1145/3061665 + const ( + safmin = 0x1p-1022 + safmax = 1 / safmin + ) + anorm := math.Abs(a) + bnorm := math.Abs(b) + switch { + case bnorm == 0: + c = 1 + s = 0 + r = a + z = 0 + case anorm == 0: + c = 0 + s = 1 + r = b + z = 1 + default: + maxab := math.Max(anorm, bnorm) + scl := math.Min(math.Max(safmin, maxab), safmax) + var sigma float64 + if anorm > bnorm { + sigma = math.Copysign(1, a) + } else { + sigma = math.Copysign(1, b) + } + ascl := a / scl + bscl := b / scl + r = sigma * (scl * math.Sqrt(ascl*ascl+bscl*bscl)) + c = a / r + s = b / r + switch { + case anorm > bnorm: + z = s + case c != 0: + z = 1 / c + default: + z = 1 + } + } + return c, s, r, z +} + +// Drotmg computes the modified Givens rotation. See +// http://www.netlib.org/lapack/explore-html/df/deb/drotmg_8f.html +// for more details. +func (Implementation) Drotmg(d1, d2, x1, y1 float64) (p blas.DrotmParams, rd1, rd2, rx1 float64) { + // The implementation of Drotmg used here is taken from Hopkins 1997 + // Appendix A: https://doi.org/10.1145/289251.289253 + // with the exception of the gam constants below. + + const ( + gam = 4096.0 + gamsq = gam * gam + rgamsq = 1.0 / gamsq + ) + + if d1 < 0 { + p.Flag = blas.Rescaling // Error state. + return p, 0, 0, 0 + } + + if d2 == 0 || y1 == 0 { + p.Flag = blas.Identity + return p, d1, d2, x1 + } + + var h11, h12, h21, h22 float64 + if (d1 == 0 || x1 == 0) && d2 > 0 { + p.Flag = blas.Diagonal + h12 = 1 + h21 = -1 + x1 = y1 + d1, d2 = d2, d1 + } else { + p2 := d2 * y1 + p1 := d1 * x1 + q2 := p2 * y1 + q1 := p1 * x1 + if math.Abs(q1) > math.Abs(q2) { + p.Flag = blas.OffDiagonal + h11 = 1 + h22 = 1 + h21 = -y1 / x1 + h12 = p2 / p1 + u := 1 - float64(h12*h21) + if u <= 0 { + p.Flag = blas.Rescaling // Error state. + return p, 0, 0, 0 + } + + d1 /= u + d2 /= u + x1 *= u + } else { + if q2 < 0 { + p.Flag = blas.Rescaling // Error state. + return p, 0, 0, 0 + } + + p.Flag = blas.Diagonal + h21 = -1 + h12 = 1 + h11 = p1 / p2 + h22 = x1 / y1 + u := 1 + float64(h11*h22) + d1, d2 = d2/u, d1/u + x1 = y1 * u + } + } + + for d1 <= rgamsq && d1 != 0 { + p.Flag = blas.Rescaling + d1 = (d1 * gam) * gam + x1 /= gam + h11 /= gam + h12 /= gam + } + for d1 > gamsq { + p.Flag = blas.Rescaling + d1 = (d1 / gam) / gam + x1 *= gam + h11 *= gam + h12 *= gam + } + + for math.Abs(d2) <= rgamsq && d2 != 0 { + p.Flag = blas.Rescaling + d2 = (d2 * gam) * gam + h21 /= gam + h22 /= gam + } + for math.Abs(d2) > gamsq { + p.Flag = blas.Rescaling + d2 = (d2 / gam) / gam + h21 *= gam + h22 *= gam + } + + switch p.Flag { + case blas.Diagonal: + p.H = [4]float64{0: h11, 3: h22} + case blas.OffDiagonal: + p.H = [4]float64{1: h21, 2: h12} + case blas.Rescaling: + p.H = [4]float64{h11, h21, h12, h22} + default: + panic(badFlag) + } + + return p, d1, d2, x1 +} + +// Drot applies a plane transformation. +// +// x[i] = c * x[i] + s * y[i] +// y[i] = c * y[i] - s * x[i] +func (Implementation) Drot(n int, x []float64, incX int, y []float64, incY int, c float64, s float64) { + if incX == 0 { + panic(zeroIncX) + } + if incY == 0 { + panic(zeroIncY) + } + if n < 1 { + if n == 0 { + return + } + panic(nLT0) + } + if (incX > 0 && len(x) <= (n-1)*incX) || (incX < 0 && len(x) <= (1-n)*incX) { + panic(shortX) + } + if (incY > 0 && len(y) <= (n-1)*incY) || (incY < 0 && len(y) <= (1-n)*incY) { + panic(shortY) + } + if incX == 1 && incY == 1 { + x = x[:n] + for i, vx := range x { + vy := y[i] + x[i], y[i] = c*vx+s*vy, c*vy-s*vx + } + return + } + var ix, iy int + if incX < 0 { + ix = (-n + 1) * incX + } + if incY < 0 { + iy = (-n + 1) * incY + } + for i := 0; i < n; i++ { + vx := x[ix] + vy := y[iy] + x[ix], y[iy] = c*vx+s*vy, c*vy-s*vx + ix += incX + iy += incY + } +} + +// Drotm applies the modified Givens rotation to the 2×n matrix. +func (Implementation) Drotm(n int, x []float64, incX int, y []float64, incY int, p blas.DrotmParams) { + if incX == 0 { + panic(zeroIncX) + } + if incY == 0 { + panic(zeroIncY) + } + if n <= 0 { + if n == 0 { + return + } + panic(nLT0) + } + if (incX > 0 && len(x) <= (n-1)*incX) || (incX < 0 && len(x) <= (1-n)*incX) { + panic(shortX) + } + if (incY > 0 && len(y) <= (n-1)*incY) || (incY < 0 && len(y) <= (1-n)*incY) { + panic(shortY) + } + + if p.Flag == blas.Identity { + return + } + + switch p.Flag { + case blas.Rescaling: + h11 := p.H[0] + h12 := p.H[2] + h21 := p.H[1] + h22 := p.H[3] + if incX == 1 && incY == 1 { + x = x[:n] + for i, vx := range x { + vy := y[i] + x[i], y[i] = float64(vx*h11)+float64(vy*h12), float64(vx*h21)+float64(vy*h22) + } + return + } + var ix, iy int + if incX < 0 { + ix = (-n + 1) * incX + } + if incY < 0 { + iy = (-n + 1) * incY + } + for i := 0; i < n; i++ { + vx := x[ix] + vy := y[iy] + x[ix], y[iy] = float64(vx*h11)+float64(vy*h12), float64(vx*h21)+float64(vy*h22) + ix += incX + iy += incY + } + case blas.OffDiagonal: + h12 := p.H[2] + h21 := p.H[1] + if incX == 1 && incY == 1 { + x = x[:n] + for i, vx := range x { + vy := y[i] + x[i], y[i] = vx+float64(vy*h12), float64(vx*h21)+vy + } + return + } + var ix, iy int + if incX < 0 { + ix = (-n + 1) * incX + } + if incY < 0 { + iy = (-n + 1) * incY + } + for i := 0; i < n; i++ { + vx := x[ix] + vy := y[iy] + x[ix], y[iy] = vx+float64(vy*h12), float64(vx*h21)+vy + ix += incX + iy += incY + } + case blas.Diagonal: + h11 := p.H[0] + h22 := p.H[3] + if incX == 1 && incY == 1 { + x = x[:n] + for i, vx := range x { + vy := y[i] + x[i], y[i] = float64(vx*h11)+vy, -vx+float64(vy*h22) + } + return + } + var ix, iy int + if incX < 0 { + ix = (-n + 1) * incX + } + if incY < 0 { + iy = (-n + 1) * incY + } + for i := 0; i < n; i++ { + vx := x[ix] + vy := y[iy] + x[ix], y[iy] = float64(vx*h11)+vy, -vx+float64(vy*h22) + ix += incX + iy += incY + } + } +} + +// Dscal scales x by alpha. +// +// x[i] *= alpha +// +// Dscal has no effect if incX < 0. +func (Implementation) Dscal(n int, alpha float64, x []float64, incX int) { + if incX < 1 { + if incX == 0 { + panic(zeroIncX) + } + return + } + if n < 1 { + if n == 0 { + return + } + panic(nLT0) + } + if (n-1)*incX >= len(x) { + panic(shortX) + } + if alpha == 0 { + if incX == 1 { + x = x[:n] + for i := range x { + x[i] = 0 + } + return + } + for ix := 0; ix < n*incX; ix += incX { + x[ix] = 0 + } + return + } + if incX == 1 { + f64.ScalUnitary(alpha, x[:n]) + return + } + f64.ScalInc(alpha, x, uintptr(n), uintptr(incX)) +} diff --git a/vendor/gonum.org/v1/gonum/blas/gonum/level1float64_ddot.go b/vendor/gonum.org/v1/gonum/blas/gonum/level1float64_ddot.go new file mode 100644 index 0000000000..1569656ef2 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/blas/gonum/level1float64_ddot.go @@ -0,0 +1,50 @@ +// Copyright ©2015 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gonum + +import ( + "gonum.org/v1/gonum/internal/asm/f64" +) + +// Ddot computes the dot product of the two vectors +// +// \sum_i x[i]*y[i] +func (Implementation) Ddot(n int, x []float64, incX int, y []float64, incY int) float64 { + if incX == 0 { + panic(zeroIncX) + } + if incY == 0 { + panic(zeroIncY) + } + if n <= 0 { + if n == 0 { + return 0 + } + panic(nLT0) + } + if incX == 1 && incY == 1 { + if len(x) < n { + panic(shortX) + } + if len(y) < n { + panic(shortY) + } + return f64.DotUnitary(x[:n], y[:n]) + } + var ix, iy int + if incX < 0 { + ix = (-n + 1) * incX + } + if incY < 0 { + iy = (-n + 1) * incY + } + if ix >= len(x) || ix+(n-1)*incX >= len(x) { + panic(shortX) + } + if iy >= len(y) || iy+(n-1)*incY >= len(y) { + panic(shortY) + } + return f64.DotInc(x, y, uintptr(n), uintptr(incX), uintptr(incY), uintptr(ix), uintptr(iy)) +} diff --git a/vendor/gonum.org/v1/gonum/blas/gonum/level2cmplx128.go b/vendor/gonum.org/v1/gonum/blas/gonum/level2cmplx128.go new file mode 100644 index 0000000000..fa076d5fb1 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/blas/gonum/level2cmplx128.go @@ -0,0 +1,2940 @@ +// Copyright ©2017 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gonum + +import ( + "math/cmplx" + + "gonum.org/v1/gonum/blas" + "gonum.org/v1/gonum/internal/asm/c128" +) + +var _ blas.Complex128Level2 = Implementation{} + +// Zgbmv performs one of the matrix-vector operations +// +// y = alpha * A * x + beta * y if trans = blas.NoTrans +// y = alpha * Aᵀ * x + beta * y if trans = blas.Trans +// y = alpha * Aᴴ * x + beta * y if trans = blas.ConjTrans +// +// where alpha and beta are scalars, x and y are vectors, and A is an m×n band matrix +// with kL sub-diagonals and kU super-diagonals. +func (Implementation) Zgbmv(trans blas.Transpose, m, n, kL, kU int, alpha complex128, a []complex128, lda int, x []complex128, incX int, beta complex128, y []complex128, incY int) { + switch trans { + default: + panic(badTranspose) + case blas.NoTrans, blas.Trans, blas.ConjTrans: + } + if m < 0 { + panic(mLT0) + } + if n < 0 { + panic(nLT0) + } + if kL < 0 { + panic(kLLT0) + } + if kU < 0 { + panic(kULT0) + } + if lda < kL+kU+1 { + panic(badLdA) + } + if incX == 0 { + panic(zeroIncX) + } + if incY == 0 { + panic(zeroIncY) + } + + // Quick return if possible. + if m == 0 || n == 0 { + return + } + + // For zero matrix size the following slice length checks are trivially satisfied. + if len(a) < lda*(min(m, n+kL)-1)+kL+kU+1 { + panic(shortA) + } + var lenX, lenY int + if trans == blas.NoTrans { + lenX, lenY = n, m + } else { + lenX, lenY = m, n + } + if (incX > 0 && len(x) <= (lenX-1)*incX) || (incX < 0 && len(x) <= (1-lenX)*incX) { + panic(shortX) + } + if (incY > 0 && len(y) <= (lenY-1)*incY) || (incY < 0 && len(y) <= (1-lenY)*incY) { + panic(shortY) + } + + // Quick return if possible. + if alpha == 0 && beta == 1 { + return + } + + var kx int + if incX < 0 { + kx = (1 - lenX) * incX + } + var ky int + if incY < 0 { + ky = (1 - lenY) * incY + } + + // Form y = beta*y. + if beta != 1 { + if incY == 1 { + if beta == 0 { + for i := range y[:lenY] { + y[i] = 0 + } + } else { + c128.ScalUnitary(beta, y[:lenY]) + } + } else { + iy := ky + if beta == 0 { + for i := 0; i < lenY; i++ { + y[iy] = 0 + iy += incY + } + } else { + if incY > 0 { + c128.ScalInc(beta, y, uintptr(lenY), uintptr(incY)) + } else { + c128.ScalInc(beta, y, uintptr(lenY), uintptr(-incY)) + } + } + } + } + + nRow := min(m, n+kL) + nCol := kL + 1 + kU + switch trans { + case blas.NoTrans: + iy := ky + if incX == 1 { + for i := 0; i < nRow; i++ { + l := max(0, kL-i) + u := min(nCol, n+kL-i) + aRow := a[i*lda+l : i*lda+u] + off := max(0, i-kL) + xtmp := x[off : off+u-l] + var sum complex128 + for j, v := range aRow { + sum += xtmp[j] * v + } + y[iy] += alpha * sum + iy += incY + } + } else { + for i := 0; i < nRow; i++ { + l := max(0, kL-i) + u := min(nCol, n+kL-i) + aRow := a[i*lda+l : i*lda+u] + off := max(0, i-kL) * incX + jx := kx + var sum complex128 + for _, v := range aRow { + sum += x[off+jx] * v + jx += incX + } + y[iy] += alpha * sum + iy += incY + } + } + case blas.Trans: + if incX == 1 { + for i := 0; i < nRow; i++ { + l := max(0, kL-i) + u := min(nCol, n+kL-i) + aRow := a[i*lda+l : i*lda+u] + off := max(0, i-kL) * incY + alphaxi := alpha * x[i] + jy := ky + for _, v := range aRow { + y[off+jy] += alphaxi * v + jy += incY + } + } + } else { + ix := kx + for i := 0; i < nRow; i++ { + l := max(0, kL-i) + u := min(nCol, n+kL-i) + aRow := a[i*lda+l : i*lda+u] + off := max(0, i-kL) * incY + alphaxi := alpha * x[ix] + jy := ky + for _, v := range aRow { + y[off+jy] += alphaxi * v + jy += incY + } + ix += incX + } + } + case blas.ConjTrans: + if incX == 1 { + for i := 0; i < nRow; i++ { + l := max(0, kL-i) + u := min(nCol, n+kL-i) + aRow := a[i*lda+l : i*lda+u] + off := max(0, i-kL) * incY + alphaxi := alpha * x[i] + jy := ky + for _, v := range aRow { + y[off+jy] += alphaxi * cmplx.Conj(v) + jy += incY + } + } + } else { + ix := kx + for i := 0; i < nRow; i++ { + l := max(0, kL-i) + u := min(nCol, n+kL-i) + aRow := a[i*lda+l : i*lda+u] + off := max(0, i-kL) * incY + alphaxi := alpha * x[ix] + jy := ky + for _, v := range aRow { + y[off+jy] += alphaxi * cmplx.Conj(v) + jy += incY + } + ix += incX + } + } + } +} + +// Zgemv performs one of the matrix-vector operations +// +// y = alpha * A * x + beta * y if trans = blas.NoTrans +// y = alpha * Aᵀ * x + beta * y if trans = blas.Trans +// y = alpha * Aᴴ * x + beta * y if trans = blas.ConjTrans +// +// where alpha and beta are scalars, x and y are vectors, and A is an m×n dense matrix. +func (Implementation) Zgemv(trans blas.Transpose, m, n int, alpha complex128, a []complex128, lda int, x []complex128, incX int, beta complex128, y []complex128, incY int) { + switch trans { + default: + panic(badTranspose) + case blas.NoTrans, blas.Trans, blas.ConjTrans: + } + if m < 0 { + panic(mLT0) + } + if n < 0 { + panic(nLT0) + } + if lda < max(1, n) { + panic(badLdA) + } + if incX == 0 { + panic(zeroIncX) + } + if incY == 0 { + panic(zeroIncY) + } + + // Quick return if possible. + if m == 0 || n == 0 { + return + } + + // For zero matrix size the following slice length checks are trivially satisfied. + var lenX, lenY int + if trans == blas.NoTrans { + lenX = n + lenY = m + } else { + lenX = m + lenY = n + } + if len(a) < lda*(m-1)+n { + panic(shortA) + } + if (incX > 0 && len(x) <= (lenX-1)*incX) || (incX < 0 && len(x) <= (1-lenX)*incX) { + panic(shortX) + } + if (incY > 0 && len(y) <= (lenY-1)*incY) || (incY < 0 && len(y) <= (1-lenY)*incY) { + panic(shortY) + } + + // Quick return if possible. + if alpha == 0 && beta == 1 { + return + } + + var kx int + if incX < 0 { + kx = (1 - lenX) * incX + } + var ky int + if incY < 0 { + ky = (1 - lenY) * incY + } + + // Form y = beta*y. + if beta != 1 { + if incY == 1 { + if beta == 0 { + for i := range y[:lenY] { + y[i] = 0 + } + } else { + c128.ScalUnitary(beta, y[:lenY]) + } + } else { + iy := ky + if beta == 0 { + for i := 0; i < lenY; i++ { + y[iy] = 0 + iy += incY + } + } else { + if incY > 0 { + c128.ScalInc(beta, y, uintptr(lenY), uintptr(incY)) + } else { + c128.ScalInc(beta, y, uintptr(lenY), uintptr(-incY)) + } + } + } + } + + if alpha == 0 { + return + } + + switch trans { + default: + // Form y = alpha*A*x + y. + iy := ky + if incX == 1 { + for i := 0; i < m; i++ { + y[iy] += alpha * c128.DotuUnitary(a[i*lda:i*lda+n], x[:n]) + iy += incY + } + return + } + for i := 0; i < m; i++ { + y[iy] += alpha * c128.DotuInc(a[i*lda:i*lda+n], x, uintptr(n), 1, uintptr(incX), 0, uintptr(kx)) + iy += incY + } + return + + case blas.Trans: + // Form y = alpha*Aᵀ*x + y. + ix := kx + if incY == 1 { + for i := 0; i < m; i++ { + c128.AxpyUnitary(alpha*x[ix], a[i*lda:i*lda+n], y[:n]) + ix += incX + } + return + } + for i := 0; i < m; i++ { + c128.AxpyInc(alpha*x[ix], a[i*lda:i*lda+n], y, uintptr(n), 1, uintptr(incY), 0, uintptr(ky)) + ix += incX + } + return + + case blas.ConjTrans: + // Form y = alpha*Aᴴ*x + y. + ix := kx + if incY == 1 { + for i := 0; i < m; i++ { + tmp := alpha * x[ix] + for j := 0; j < n; j++ { + y[j] += tmp * cmplx.Conj(a[i*lda+j]) + } + ix += incX + } + return + } + for i := 0; i < m; i++ { + tmp := alpha * x[ix] + jy := ky + for j := 0; j < n; j++ { + y[jy] += tmp * cmplx.Conj(a[i*lda+j]) + jy += incY + } + ix += incX + } + return + } +} + +// Zgerc performs the rank-one operation +// +// A += alpha * x * yᴴ +// +// where A is an m×n dense matrix, alpha is a scalar, x is an m element vector, +// and y is an n element vector. +func (Implementation) Zgerc(m, n int, alpha complex128, x []complex128, incX int, y []complex128, incY int, a []complex128, lda int) { + if m < 0 { + panic(mLT0) + } + if n < 0 { + panic(nLT0) + } + if lda < max(1, n) { + panic(badLdA) + } + if incX == 0 { + panic(zeroIncX) + } + if incY == 0 { + panic(zeroIncY) + } + + // Quick return if possible. + if m == 0 || n == 0 { + return + } + + // For zero matrix size the following slice length checks are trivially satisfied. + if (incX > 0 && len(x) <= (m-1)*incX) || (incX < 0 && len(x) <= (1-m)*incX) { + panic(shortX) + } + if (incY > 0 && len(y) <= (n-1)*incY) || (incY < 0 && len(y) <= (1-n)*incY) { + panic(shortY) + } + if len(a) < lda*(m-1)+n { + panic(shortA) + } + + // Quick return if possible. + if alpha == 0 { + return + } + + var kx, jy int + if incX < 0 { + kx = (1 - m) * incX + } + if incY < 0 { + jy = (1 - n) * incY + } + for j := 0; j < n; j++ { + if y[jy] != 0 { + tmp := alpha * cmplx.Conj(y[jy]) + c128.AxpyInc(tmp, x, a[j:], uintptr(m), uintptr(incX), uintptr(lda), uintptr(kx), 0) + } + jy += incY + } +} + +// Zgeru performs the rank-one operation +// +// A += alpha * x * yᵀ +// +// where A is an m×n dense matrix, alpha is a scalar, x is an m element vector, +// and y is an n element vector. +func (Implementation) Zgeru(m, n int, alpha complex128, x []complex128, incX int, y []complex128, incY int, a []complex128, lda int) { + if m < 0 { + panic(mLT0) + } + if n < 0 { + panic(nLT0) + } + if lda < max(1, n) { + panic(badLdA) + } + if incX == 0 { + panic(zeroIncX) + } + if incY == 0 { + panic(zeroIncY) + } + + // Quick return if possible. + if m == 0 || n == 0 { + return + } + + // For zero matrix size the following slice length checks are trivially satisfied. + if (incX > 0 && len(x) <= (m-1)*incX) || (incX < 0 && len(x) <= (1-m)*incX) { + panic(shortX) + } + if (incY > 0 && len(y) <= (n-1)*incY) || (incY < 0 && len(y) <= (1-n)*incY) { + panic(shortY) + } + if len(a) < lda*(m-1)+n { + panic(shortA) + } + + // Quick return if possible. + if alpha == 0 { + return + } + + var kx int + if incX < 0 { + kx = (1 - m) * incX + } + if incY == 1 { + for i := 0; i < m; i++ { + if x[kx] != 0 { + tmp := alpha * x[kx] + c128.AxpyUnitary(tmp, y[:n], a[i*lda:i*lda+n]) + } + kx += incX + } + return + } + var jy int + if incY < 0 { + jy = (1 - n) * incY + } + for i := 0; i < m; i++ { + if x[kx] != 0 { + tmp := alpha * x[kx] + c128.AxpyInc(tmp, y, a[i*lda:i*lda+n], uintptr(n), uintptr(incY), 1, uintptr(jy), 0) + } + kx += incX + } +} + +// Zhbmv performs the matrix-vector operation +// +// y = alpha * A * x + beta * y +// +// where alpha and beta are scalars, x and y are vectors, and A is an n×n +// Hermitian band matrix with k super-diagonals. The imaginary parts of +// the diagonal elements of A are ignored and assumed to be zero. +func (Implementation) Zhbmv(uplo blas.Uplo, n, k int, alpha complex128, a []complex128, lda int, x []complex128, incX int, beta complex128, y []complex128, incY int) { + switch uplo { + default: + panic(badUplo) + case blas.Upper, blas.Lower: + } + if n < 0 { + panic(nLT0) + } + if k < 0 { + panic(kLT0) + } + if lda < k+1 { + panic(badLdA) + } + if incX == 0 { + panic(zeroIncX) + } + if incY == 0 { + panic(zeroIncY) + } + + // Quick return if possible. + if n == 0 { + return + } + + // For zero matrix size the following slice length checks are trivially satisfied. + if len(a) < lda*(n-1)+k+1 { + panic(shortA) + } + if (incX > 0 && len(x) <= (n-1)*incX) || (incX < 0 && len(x) <= (1-n)*incX) { + panic(shortX) + } + if (incY > 0 && len(y) <= (n-1)*incY) || (incY < 0 && len(y) <= (1-n)*incY) { + panic(shortY) + } + + // Quick return if possible. + if alpha == 0 && beta == 1 { + return + } + + // Set up the start indices in X and Y. + var kx int + if incX < 0 { + kx = (1 - n) * incX + } + var ky int + if incY < 0 { + ky = (1 - n) * incY + } + + // Form y = beta*y. + if beta != 1 { + if incY == 1 { + if beta == 0 { + for i := range y[:n] { + y[i] = 0 + } + } else { + for i, v := range y[:n] { + y[i] = beta * v + } + } + } else { + iy := ky + if beta == 0 { + for i := 0; i < n; i++ { + y[iy] = 0 + iy += incY + } + } else { + for i := 0; i < n; i++ { + y[iy] = beta * y[iy] + iy += incY + } + } + } + } + + if alpha == 0 { + return + } + + // The elements of A are accessed sequentially with one pass through a. + switch uplo { + case blas.Upper: + iy := ky + if incX == 1 { + for i := 0; i < n; i++ { + aRow := a[i*lda:] + alphaxi := alpha * x[i] + sum := alphaxi * complex(real(aRow[0]), 0) + u := min(k+1, n-i) + jy := incY + for j := 1; j < u; j++ { + v := aRow[j] + sum += alpha * x[i+j] * v + y[iy+jy] += alphaxi * cmplx.Conj(v) + jy += incY + } + y[iy] += sum + iy += incY + } + } else { + ix := kx + for i := 0; i < n; i++ { + aRow := a[i*lda:] + alphaxi := alpha * x[ix] + sum := alphaxi * complex(real(aRow[0]), 0) + u := min(k+1, n-i) + jx := incX + jy := incY + for j := 1; j < u; j++ { + v := aRow[j] + sum += alpha * x[ix+jx] * v + y[iy+jy] += alphaxi * cmplx.Conj(v) + jx += incX + jy += incY + } + y[iy] += sum + ix += incX + iy += incY + } + } + case blas.Lower: + iy := ky + if incX == 1 { + for i := 0; i < n; i++ { + l := max(0, k-i) + alphaxi := alpha * x[i] + jy := l * incY + aRow := a[i*lda:] + for j := l; j < k; j++ { + v := aRow[j] + y[iy] += alpha * v * x[i-k+j] + y[iy-k*incY+jy] += alphaxi * cmplx.Conj(v) + jy += incY + } + y[iy] += alphaxi * complex(real(aRow[k]), 0) + iy += incY + } + } else { + ix := kx + for i := 0; i < n; i++ { + l := max(0, k-i) + alphaxi := alpha * x[ix] + jx := l * incX + jy := l * incY + aRow := a[i*lda:] + for j := l; j < k; j++ { + v := aRow[j] + y[iy] += alpha * v * x[ix-k*incX+jx] + y[iy-k*incY+jy] += alphaxi * cmplx.Conj(v) + jx += incX + jy += incY + } + y[iy] += alphaxi * complex(real(aRow[k]), 0) + ix += incX + iy += incY + } + } + } +} + +// Zhemv performs the matrix-vector operation +// +// y = alpha * A * x + beta * y +// +// where alpha and beta are scalars, x and y are vectors, and A is an n×n +// Hermitian matrix. The imaginary parts of the diagonal elements of A are +// ignored and assumed to be zero. +func (Implementation) Zhemv(uplo blas.Uplo, n int, alpha complex128, a []complex128, lda int, x []complex128, incX int, beta complex128, y []complex128, incY int) { + switch uplo { + default: + panic(badUplo) + case blas.Upper, blas.Lower: + } + if n < 0 { + panic(nLT0) + } + if lda < max(1, n) { + panic(badLdA) + } + if incX == 0 { + panic(zeroIncX) + } + if incY == 0 { + panic(zeroIncY) + } + + // Quick return if possible. + if n == 0 { + return + } + + // For zero matrix size the following slice length checks are trivially satisfied. + if len(a) < lda*(n-1)+n { + panic(shortA) + } + if (incX > 0 && len(x) <= (n-1)*incX) || (incX < 0 && len(x) <= (1-n)*incX) { + panic(shortX) + } + if (incY > 0 && len(y) <= (n-1)*incY) || (incY < 0 && len(y) <= (1-n)*incY) { + panic(shortY) + } + + // Quick return if possible. + if alpha == 0 && beta == 1 { + return + } + + // Set up the start indices in X and Y. + var kx int + if incX < 0 { + kx = (1 - n) * incX + } + var ky int + if incY < 0 { + ky = (1 - n) * incY + } + + // Form y = beta*y. + if beta != 1 { + if incY == 1 { + if beta == 0 { + for i := range y[:n] { + y[i] = 0 + } + } else { + for i, v := range y[:n] { + y[i] = beta * v + } + } + } else { + iy := ky + if beta == 0 { + for i := 0; i < n; i++ { + y[iy] = 0 + iy += incY + } + } else { + for i := 0; i < n; i++ { + y[iy] = beta * y[iy] + iy += incY + } + } + } + } + + if alpha == 0 { + return + } + + // The elements of A are accessed sequentially with one pass through + // the triangular part of A. + + if uplo == blas.Upper { + // Form y when A is stored in upper triangle. + if incX == 1 && incY == 1 { + for i := 0; i < n; i++ { + tmp1 := alpha * x[i] + var tmp2 complex128 + for j := i + 1; j < n; j++ { + y[j] += tmp1 * cmplx.Conj(a[i*lda+j]) + tmp2 += a[i*lda+j] * x[j] + } + aii := complex(real(a[i*lda+i]), 0) + y[i] += tmp1*aii + alpha*tmp2 + } + } else { + ix := kx + iy := ky + for i := 0; i < n; i++ { + tmp1 := alpha * x[ix] + var tmp2 complex128 + jx := ix + jy := iy + for j := i + 1; j < n; j++ { + jx += incX + jy += incY + y[jy] += tmp1 * cmplx.Conj(a[i*lda+j]) + tmp2 += a[i*lda+j] * x[jx] + } + aii := complex(real(a[i*lda+i]), 0) + y[iy] += tmp1*aii + alpha*tmp2 + ix += incX + iy += incY + } + } + return + } + + // Form y when A is stored in lower triangle. + if incX == 1 && incY == 1 { + for i := 0; i < n; i++ { + tmp1 := alpha * x[i] + var tmp2 complex128 + for j := 0; j < i; j++ { + y[j] += tmp1 * cmplx.Conj(a[i*lda+j]) + tmp2 += a[i*lda+j] * x[j] + } + aii := complex(real(a[i*lda+i]), 0) + y[i] += tmp1*aii + alpha*tmp2 + } + } else { + ix := kx + iy := ky + for i := 0; i < n; i++ { + tmp1 := alpha * x[ix] + var tmp2 complex128 + jx := kx + jy := ky + for j := 0; j < i; j++ { + y[jy] += tmp1 * cmplx.Conj(a[i*lda+j]) + tmp2 += a[i*lda+j] * x[jx] + jx += incX + jy += incY + } + aii := complex(real(a[i*lda+i]), 0) + y[iy] += tmp1*aii + alpha*tmp2 + ix += incX + iy += incY + } + } +} + +// Zher performs the Hermitian rank-one operation +// +// A += alpha * x * xᴴ +// +// where A is an n×n Hermitian matrix, alpha is a real scalar, and x is an n +// element vector. On entry, the imaginary parts of the diagonal elements of A +// are ignored and assumed to be zero, on return they will be set to zero. +func (Implementation) Zher(uplo blas.Uplo, n int, alpha float64, x []complex128, incX int, a []complex128, lda int) { + switch uplo { + default: + panic(badUplo) + case blas.Upper, blas.Lower: + } + if n < 0 { + panic(nLT0) + } + if lda < max(1, n) { + panic(badLdA) + } + if incX == 0 { + panic(zeroIncX) + } + + // Quick return if possible. + if n == 0 { + return + } + + // For zero matrix size the following slice length checks are trivially satisfied. + if (incX > 0 && len(x) <= (n-1)*incX) || (incX < 0 && len(x) <= (1-n)*incX) { + panic(shortX) + } + if len(a) < lda*(n-1)+n { + panic(shortA) + } + + // Quick return if possible. + if alpha == 0 { + return + } + + var kx int + if incX < 0 { + kx = (1 - n) * incX + } + if uplo == blas.Upper { + if incX == 1 { + for i := 0; i < n; i++ { + if x[i] != 0 { + tmp := complex(alpha*real(x[i]), alpha*imag(x[i])) + aii := real(a[i*lda+i]) + xtmp := real(tmp * cmplx.Conj(x[i])) + a[i*lda+i] = complex(aii+xtmp, 0) + for j := i + 1; j < n; j++ { + a[i*lda+j] += tmp * cmplx.Conj(x[j]) + } + } else { + aii := real(a[i*lda+i]) + a[i*lda+i] = complex(aii, 0) + } + } + return + } + + ix := kx + for i := 0; i < n; i++ { + if x[ix] != 0 { + tmp := complex(alpha*real(x[ix]), alpha*imag(x[ix])) + aii := real(a[i*lda+i]) + xtmp := real(tmp * cmplx.Conj(x[ix])) + a[i*lda+i] = complex(aii+xtmp, 0) + jx := ix + incX + for j := i + 1; j < n; j++ { + a[i*lda+j] += tmp * cmplx.Conj(x[jx]) + jx += incX + } + } else { + aii := real(a[i*lda+i]) + a[i*lda+i] = complex(aii, 0) + } + ix += incX + } + return + } + + if incX == 1 { + for i := 0; i < n; i++ { + if x[i] != 0 { + tmp := complex(alpha*real(x[i]), alpha*imag(x[i])) + for j := 0; j < i; j++ { + a[i*lda+j] += tmp * cmplx.Conj(x[j]) + } + aii := real(a[i*lda+i]) + xtmp := real(tmp * cmplx.Conj(x[i])) + a[i*lda+i] = complex(aii+xtmp, 0) + } else { + aii := real(a[i*lda+i]) + a[i*lda+i] = complex(aii, 0) + } + } + return + } + + ix := kx + for i := 0; i < n; i++ { + if x[ix] != 0 { + tmp := complex(alpha*real(x[ix]), alpha*imag(x[ix])) + jx := kx + for j := 0; j < i; j++ { + a[i*lda+j] += tmp * cmplx.Conj(x[jx]) + jx += incX + } + aii := real(a[i*lda+i]) + xtmp := real(tmp * cmplx.Conj(x[ix])) + a[i*lda+i] = complex(aii+xtmp, 0) + + } else { + aii := real(a[i*lda+i]) + a[i*lda+i] = complex(aii, 0) + } + ix += incX + } +} + +// Zher2 performs the Hermitian rank-two operation +// +// A += alpha * x * yᴴ + conj(alpha) * y * xᴴ +// +// where alpha is a scalar, x and y are n element vectors and A is an n×n +// Hermitian matrix. On entry, the imaginary parts of the diagonal elements are +// ignored and assumed to be zero. On return they will be set to zero. +func (Implementation) Zher2(uplo blas.Uplo, n int, alpha complex128, x []complex128, incX int, y []complex128, incY int, a []complex128, lda int) { + switch uplo { + default: + panic(badUplo) + case blas.Upper, blas.Lower: + } + if n < 0 { + panic(nLT0) + } + if lda < max(1, n) { + panic(badLdA) + } + if incX == 0 { + panic(zeroIncX) + } + if incY == 0 { + panic(zeroIncY) + } + + // Quick return if possible. + if n == 0 { + return + } + + // For zero matrix size the following slice length checks are trivially satisfied. + if (incX > 0 && len(x) <= (n-1)*incX) || (incX < 0 && len(x) <= (1-n)*incX) { + panic(shortX) + } + if (incY > 0 && len(y) <= (n-1)*incY) || (incY < 0 && len(y) <= (1-n)*incY) { + panic(shortY) + } + if len(a) < lda*(n-1)+n { + panic(shortA) + } + + // Quick return if possible. + if alpha == 0 { + return + } + + var kx, ky int + var ix, iy int + if incX != 1 || incY != 1 { + if incX < 0 { + kx = (1 - n) * incX + } + if incY < 0 { + ky = (1 - n) * incY + } + ix = kx + iy = ky + } + if uplo == blas.Upper { + if incX == 1 && incY == 1 { + for i := 0; i < n; i++ { + if x[i] != 0 || y[i] != 0 { + tmp1 := alpha * x[i] + tmp2 := cmplx.Conj(alpha) * y[i] + aii := real(a[i*lda+i]) + real(tmp1*cmplx.Conj(y[i])) + real(tmp2*cmplx.Conj(x[i])) + a[i*lda+i] = complex(aii, 0) + for j := i + 1; j < n; j++ { + a[i*lda+j] += tmp1*cmplx.Conj(y[j]) + tmp2*cmplx.Conj(x[j]) + } + } else { + aii := real(a[i*lda+i]) + a[i*lda+i] = complex(aii, 0) + } + } + return + } + for i := 0; i < n; i++ { + if x[ix] != 0 || y[iy] != 0 { + tmp1 := alpha * x[ix] + tmp2 := cmplx.Conj(alpha) * y[iy] + aii := real(a[i*lda+i]) + real(tmp1*cmplx.Conj(y[iy])) + real(tmp2*cmplx.Conj(x[ix])) + a[i*lda+i] = complex(aii, 0) + jx := ix + incX + jy := iy + incY + for j := i + 1; j < n; j++ { + a[i*lda+j] += tmp1*cmplx.Conj(y[jy]) + tmp2*cmplx.Conj(x[jx]) + jx += incX + jy += incY + } + } else { + aii := real(a[i*lda+i]) + a[i*lda+i] = complex(aii, 0) + } + ix += incX + iy += incY + } + return + } + + if incX == 1 && incY == 1 { + for i := 0; i < n; i++ { + if x[i] != 0 || y[i] != 0 { + tmp1 := alpha * x[i] + tmp2 := cmplx.Conj(alpha) * y[i] + for j := 0; j < i; j++ { + a[i*lda+j] += tmp1*cmplx.Conj(y[j]) + tmp2*cmplx.Conj(x[j]) + } + aii := real(a[i*lda+i]) + real(tmp1*cmplx.Conj(y[i])) + real(tmp2*cmplx.Conj(x[i])) + a[i*lda+i] = complex(aii, 0) + } else { + aii := real(a[i*lda+i]) + a[i*lda+i] = complex(aii, 0) + } + } + return + } + for i := 0; i < n; i++ { + if x[ix] != 0 || y[iy] != 0 { + tmp1 := alpha * x[ix] + tmp2 := cmplx.Conj(alpha) * y[iy] + jx := kx + jy := ky + for j := 0; j < i; j++ { + a[i*lda+j] += tmp1*cmplx.Conj(y[jy]) + tmp2*cmplx.Conj(x[jx]) + jx += incX + jy += incY + } + aii := real(a[i*lda+i]) + real(tmp1*cmplx.Conj(y[iy])) + real(tmp2*cmplx.Conj(x[ix])) + a[i*lda+i] = complex(aii, 0) + } else { + aii := real(a[i*lda+i]) + a[i*lda+i] = complex(aii, 0) + } + ix += incX + iy += incY + } +} + +// Zhpmv performs the matrix-vector operation +// +// y = alpha * A * x + beta * y +// +// where alpha and beta are scalars, x and y are vectors, and A is an n×n +// Hermitian matrix in packed form. The imaginary parts of the diagonal +// elements of A are ignored and assumed to be zero. +func (Implementation) Zhpmv(uplo blas.Uplo, n int, alpha complex128, ap []complex128, x []complex128, incX int, beta complex128, y []complex128, incY int) { + switch uplo { + default: + panic(badUplo) + case blas.Upper, blas.Lower: + } + if n < 0 { + panic(nLT0) + } + if incX == 0 { + panic(zeroIncX) + } + if incY == 0 { + panic(zeroIncY) + } + + // Quick return if possible. + if n == 0 { + return + } + + // For zero matrix size the following slice length checks are trivially satisfied. + if len(ap) < n*(n+1)/2 { + panic(shortAP) + } + if (incX > 0 && len(x) <= (n-1)*incX) || (incX < 0 && len(x) <= (1-n)*incX) { + panic(shortX) + } + if (incY > 0 && len(y) <= (n-1)*incY) || (incY < 0 && len(y) <= (1-n)*incY) { + panic(shortY) + } + + // Quick return if possible. + if alpha == 0 && beta == 1 { + return + } + + // Set up the start indices in X and Y. + var kx int + if incX < 0 { + kx = (1 - n) * incX + } + var ky int + if incY < 0 { + ky = (1 - n) * incY + } + + // Form y = beta*y. + if beta != 1 { + if incY == 1 { + if beta == 0 { + for i := range y[:n] { + y[i] = 0 + } + } else { + for i, v := range y[:n] { + y[i] = beta * v + } + } + } else { + iy := ky + if beta == 0 { + for i := 0; i < n; i++ { + y[iy] = 0 + iy += incY + } + } else { + for i := 0; i < n; i++ { + y[iy] *= beta + iy += incY + } + } + } + } + + if alpha == 0 { + return + } + + // The elements of A are accessed sequentially with one pass through ap. + + var kk int + if uplo == blas.Upper { + // Form y when ap contains the upper triangle. + // Here, kk points to the current diagonal element in ap. + if incX == 1 && incY == 1 { + for i := 0; i < n; i++ { + tmp1 := alpha * x[i] + y[i] += tmp1 * complex(real(ap[kk]), 0) + var tmp2 complex128 + k := kk + 1 + for j := i + 1; j < n; j++ { + y[j] += tmp1 * cmplx.Conj(ap[k]) + tmp2 += ap[k] * x[j] + k++ + } + y[i] += alpha * tmp2 + kk += n - i + } + } else { + ix := kx + iy := ky + for i := 0; i < n; i++ { + tmp1 := alpha * x[ix] + y[iy] += tmp1 * complex(real(ap[kk]), 0) + var tmp2 complex128 + jx := ix + jy := iy + for k := kk + 1; k < kk+n-i; k++ { + jx += incX + jy += incY + y[jy] += tmp1 * cmplx.Conj(ap[k]) + tmp2 += ap[k] * x[jx] + } + y[iy] += alpha * tmp2 + ix += incX + iy += incY + kk += n - i + } + } + return + } + + // Form y when ap contains the lower triangle. + // Here, kk points to the beginning of current row in ap. + if incX == 1 && incY == 1 { + for i := 0; i < n; i++ { + tmp1 := alpha * x[i] + var tmp2 complex128 + k := kk + for j := 0; j < i; j++ { + y[j] += tmp1 * cmplx.Conj(ap[k]) + tmp2 += ap[k] * x[j] + k++ + } + aii := complex(real(ap[kk+i]), 0) + y[i] += tmp1*aii + alpha*tmp2 + kk += i + 1 + } + } else { + ix := kx + iy := ky + for i := 0; i < n; i++ { + tmp1 := alpha * x[ix] + var tmp2 complex128 + jx := kx + jy := ky + for k := kk; k < kk+i; k++ { + y[jy] += tmp1 * cmplx.Conj(ap[k]) + tmp2 += ap[k] * x[jx] + jx += incX + jy += incY + } + aii := complex(real(ap[kk+i]), 0) + y[iy] += tmp1*aii + alpha*tmp2 + ix += incX + iy += incY + kk += i + 1 + } + } +} + +// Zhpr performs the Hermitian rank-1 operation +// +// A += alpha * x * xᴴ +// +// where alpha is a real scalar, x is a vector, and A is an n×n hermitian matrix +// in packed form. On entry, the imaginary parts of the diagonal elements are +// assumed to be zero, and on return they are set to zero. +func (Implementation) Zhpr(uplo blas.Uplo, n int, alpha float64, x []complex128, incX int, ap []complex128) { + switch uplo { + default: + panic(badUplo) + case blas.Upper, blas.Lower: + } + if n < 0 { + panic(nLT0) + } + if incX == 0 { + panic(zeroIncX) + } + + // Quick return if possible. + if n == 0 { + return + } + + // For zero matrix size the following slice length checks are trivially satisfied. + if (incX > 0 && len(x) <= (n-1)*incX) || (incX < 0 && len(x) <= (1-n)*incX) { + panic(shortX) + } + if len(ap) < n*(n+1)/2 { + panic(shortAP) + } + + // Quick return if possible. + if alpha == 0 { + return + } + + // Set up start index in X. + var kx int + if incX < 0 { + kx = (1 - n) * incX + } + + // The elements of A are accessed sequentially with one pass through ap. + + var kk int + if uplo == blas.Upper { + // Form A when upper triangle is stored in AP. + // Here, kk points to the current diagonal element in ap. + if incX == 1 { + for i := 0; i < n; i++ { + xi := x[i] + if xi != 0 { + aii := real(ap[kk]) + alpha*real(cmplx.Conj(xi)*xi) + ap[kk] = complex(aii, 0) + + tmp := complex(alpha, 0) * xi + a := ap[kk+1 : kk+n-i] + x := x[i+1 : n] + for j, v := range x { + a[j] += tmp * cmplx.Conj(v) + } + } else { + ap[kk] = complex(real(ap[kk]), 0) + } + kk += n - i + } + } else { + ix := kx + for i := 0; i < n; i++ { + xi := x[ix] + if xi != 0 { + aii := real(ap[kk]) + alpha*real(cmplx.Conj(xi)*xi) + ap[kk] = complex(aii, 0) + + tmp := complex(alpha, 0) * xi + jx := ix + incX + a := ap[kk+1 : kk+n-i] + for k := range a { + a[k] += tmp * cmplx.Conj(x[jx]) + jx += incX + } + } else { + ap[kk] = complex(real(ap[kk]), 0) + } + ix += incX + kk += n - i + } + } + return + } + + // Form A when lower triangle is stored in AP. + // Here, kk points to the beginning of current row in ap. + if incX == 1 { + for i := 0; i < n; i++ { + xi := x[i] + if xi != 0 { + tmp := complex(alpha, 0) * xi + a := ap[kk : kk+i] + for j, v := range x[:i] { + a[j] += tmp * cmplx.Conj(v) + } + + aii := real(ap[kk+i]) + alpha*real(cmplx.Conj(xi)*xi) + ap[kk+i] = complex(aii, 0) + } else { + ap[kk+i] = complex(real(ap[kk+i]), 0) + } + kk += i + 1 + } + } else { + ix := kx + for i := 0; i < n; i++ { + xi := x[ix] + if xi != 0 { + tmp := complex(alpha, 0) * xi + a := ap[kk : kk+i] + jx := kx + for k := range a { + a[k] += tmp * cmplx.Conj(x[jx]) + jx += incX + } + + aii := real(ap[kk+i]) + alpha*real(cmplx.Conj(xi)*xi) + ap[kk+i] = complex(aii, 0) + } else { + ap[kk+i] = complex(real(ap[kk+i]), 0) + } + ix += incX + kk += i + 1 + } + } +} + +// Zhpr2 performs the Hermitian rank-2 operation +// +// A += alpha * x * yᴴ + conj(alpha) * y * xᴴ +// +// where alpha is a complex scalar, x and y are n element vectors, and A is an +// n×n Hermitian matrix, supplied in packed form. On entry, the imaginary parts +// of the diagonal elements are assumed to be zero, and on return they are set to zero. +func (Implementation) Zhpr2(uplo blas.Uplo, n int, alpha complex128, x []complex128, incX int, y []complex128, incY int, ap []complex128) { + switch uplo { + default: + panic(badUplo) + case blas.Upper, blas.Lower: + } + if n < 0 { + panic(nLT0) + } + if incX == 0 { + panic(zeroIncX) + } + if incY == 0 { + panic(zeroIncY) + } + + // Quick return if possible. + if n == 0 { + return + } + + // For zero matrix size the following slice length checks are trivially satisfied. + if (incX > 0 && len(x) <= (n-1)*incX) || (incX < 0 && len(x) <= (1-n)*incX) { + panic(shortX) + } + if (incY > 0 && len(y) <= (n-1)*incY) || (incY < 0 && len(y) <= (1-n)*incY) { + panic(shortY) + } + if len(ap) < n*(n+1)/2 { + panic(shortAP) + } + + // Quick return if possible. + if alpha == 0 { + return + } + + // Set up start indices in X and Y. + var kx int + if incX < 0 { + kx = (1 - n) * incX + } + var ky int + if incY < 0 { + ky = (1 - n) * incY + } + + // The elements of A are accessed sequentially with one pass through ap. + + var kk int + if uplo == blas.Upper { + // Form A when upper triangle is stored in AP. + // Here, kk points to the current diagonal element in ap. + if incX == 1 && incY == 1 { + for i := 0; i < n; i++ { + if x[i] != 0 || y[i] != 0 { + tmp1 := alpha * x[i] + tmp2 := cmplx.Conj(alpha) * y[i] + aii := real(ap[kk]) + real(tmp1*cmplx.Conj(y[i])) + real(tmp2*cmplx.Conj(x[i])) + ap[kk] = complex(aii, 0) + k := kk + 1 + for j := i + 1; j < n; j++ { + ap[k] += tmp1*cmplx.Conj(y[j]) + tmp2*cmplx.Conj(x[j]) + k++ + } + } else { + ap[kk] = complex(real(ap[kk]), 0) + } + kk += n - i + } + } else { + ix := kx + iy := ky + for i := 0; i < n; i++ { + if x[ix] != 0 || y[iy] != 0 { + tmp1 := alpha * x[ix] + tmp2 := cmplx.Conj(alpha) * y[iy] + aii := real(ap[kk]) + real(tmp1*cmplx.Conj(y[iy])) + real(tmp2*cmplx.Conj(x[ix])) + ap[kk] = complex(aii, 0) + jx := ix + incX + jy := iy + incY + for k := kk + 1; k < kk+n-i; k++ { + ap[k] += tmp1*cmplx.Conj(y[jy]) + tmp2*cmplx.Conj(x[jx]) + jx += incX + jy += incY + } + } else { + ap[kk] = complex(real(ap[kk]), 0) + } + ix += incX + iy += incY + kk += n - i + } + } + return + } + + // Form A when lower triangle is stored in AP. + // Here, kk points to the beginning of current row in ap. + if incX == 1 && incY == 1 { + for i := 0; i < n; i++ { + if x[i] != 0 || y[i] != 0 { + tmp1 := alpha * x[i] + tmp2 := cmplx.Conj(alpha) * y[i] + k := kk + for j := 0; j < i; j++ { + ap[k] += tmp1*cmplx.Conj(y[j]) + tmp2*cmplx.Conj(x[j]) + k++ + } + aii := real(ap[kk+i]) + real(tmp1*cmplx.Conj(y[i])) + real(tmp2*cmplx.Conj(x[i])) + ap[kk+i] = complex(aii, 0) + } else { + ap[kk+i] = complex(real(ap[kk+i]), 0) + } + kk += i + 1 + } + } else { + ix := kx + iy := ky + for i := 0; i < n; i++ { + if x[ix] != 0 || y[iy] != 0 { + tmp1 := alpha * x[ix] + tmp2 := cmplx.Conj(alpha) * y[iy] + jx := kx + jy := ky + for k := kk; k < kk+i; k++ { + ap[k] += tmp1*cmplx.Conj(y[jy]) + tmp2*cmplx.Conj(x[jx]) + jx += incX + jy += incY + } + aii := real(ap[kk+i]) + real(tmp1*cmplx.Conj(y[iy])) + real(tmp2*cmplx.Conj(x[ix])) + ap[kk+i] = complex(aii, 0) + } else { + ap[kk+i] = complex(real(ap[kk+i]), 0) + } + ix += incX + iy += incY + kk += i + 1 + } + } +} + +// Ztbmv performs one of the matrix-vector operations +// +// x = A * x if trans = blas.NoTrans +// x = Aᵀ * x if trans = blas.Trans +// x = Aᴴ * x if trans = blas.ConjTrans +// +// where x is an n element vector and A is an n×n triangular band matrix, with +// (k+1) diagonals. +func (Implementation) Ztbmv(uplo blas.Uplo, trans blas.Transpose, diag blas.Diag, n, k int, a []complex128, lda int, x []complex128, incX int) { + switch trans { + default: + panic(badTranspose) + case blas.NoTrans, blas.Trans, blas.ConjTrans: + } + switch uplo { + default: + panic(badUplo) + case blas.Upper, blas.Lower: + } + switch diag { + default: + panic(badDiag) + case blas.NonUnit, blas.Unit: + } + if n < 0 { + panic(nLT0) + } + if k < 0 { + panic(kLT0) + } + if lda < k+1 { + panic(badLdA) + } + if incX == 0 { + panic(zeroIncX) + } + + // Quick return if possible. + if n == 0 { + return + } + + // For zero matrix size the following slice length checks are trivially satisfied. + if len(a) < lda*(n-1)+k+1 { + panic(shortA) + } + if (incX > 0 && len(x) <= (n-1)*incX) || (incX < 0 && len(x) <= (1-n)*incX) { + panic(shortX) + } + + // Set up start index in X. + var kx int + if incX < 0 { + kx = (1 - n) * incX + } + + switch trans { + case blas.NoTrans: + if uplo == blas.Upper { + if incX == 1 { + for i := 0; i < n; i++ { + xi := x[i] + if diag == blas.NonUnit { + xi *= a[i*lda] + } + kk := min(k, n-i-1) + for j, aij := range a[i*lda+1 : i*lda+kk+1] { + xi += x[i+j+1] * aij + } + x[i] = xi + } + } else { + ix := kx + for i := 0; i < n; i++ { + xi := x[ix] + if diag == blas.NonUnit { + xi *= a[i*lda] + } + kk := min(k, n-i-1) + jx := ix + incX + for _, aij := range a[i*lda+1 : i*lda+kk+1] { + xi += x[jx] * aij + jx += incX + } + x[ix] = xi + ix += incX + } + } + } else { + if incX == 1 { + for i := n - 1; i >= 0; i-- { + xi := x[i] + if diag == blas.NonUnit { + xi *= a[i*lda+k] + } + kk := min(k, i) + for j, aij := range a[i*lda+k-kk : i*lda+k] { + xi += x[i-kk+j] * aij + } + x[i] = xi + } + } else { + ix := kx + (n-1)*incX + for i := n - 1; i >= 0; i-- { + xi := x[ix] + if diag == blas.NonUnit { + xi *= a[i*lda+k] + } + kk := min(k, i) + jx := ix - kk*incX + for _, aij := range a[i*lda+k-kk : i*lda+k] { + xi += x[jx] * aij + jx += incX + } + x[ix] = xi + ix -= incX + } + } + } + case blas.Trans: + if uplo == blas.Upper { + if incX == 1 { + for i := n - 1; i >= 0; i-- { + kk := min(k, n-i-1) + xi := x[i] + for j, aij := range a[i*lda+1 : i*lda+kk+1] { + x[i+j+1] += xi * aij + } + if diag == blas.NonUnit { + x[i] *= a[i*lda] + } + } + } else { + ix := kx + (n-1)*incX + for i := n - 1; i >= 0; i-- { + kk := min(k, n-i-1) + jx := ix + incX + xi := x[ix] + for _, aij := range a[i*lda+1 : i*lda+kk+1] { + x[jx] += xi * aij + jx += incX + } + if diag == blas.NonUnit { + x[ix] *= a[i*lda] + } + ix -= incX + } + } + } else { + if incX == 1 { + for i := 0; i < n; i++ { + kk := min(k, i) + xi := x[i] + for j, aij := range a[i*lda+k-kk : i*lda+k] { + x[i-kk+j] += xi * aij + } + if diag == blas.NonUnit { + x[i] *= a[i*lda+k] + } + } + } else { + ix := kx + for i := 0; i < n; i++ { + kk := min(k, i) + jx := ix - kk*incX + xi := x[ix] + for _, aij := range a[i*lda+k-kk : i*lda+k] { + x[jx] += xi * aij + jx += incX + } + if diag == blas.NonUnit { + x[ix] *= a[i*lda+k] + } + ix += incX + } + } + } + case blas.ConjTrans: + if uplo == blas.Upper { + if incX == 1 { + for i := n - 1; i >= 0; i-- { + kk := min(k, n-i-1) + xi := x[i] + for j, aij := range a[i*lda+1 : i*lda+kk+1] { + x[i+j+1] += xi * cmplx.Conj(aij) + } + if diag == blas.NonUnit { + x[i] *= cmplx.Conj(a[i*lda]) + } + } + } else { + ix := kx + (n-1)*incX + for i := n - 1; i >= 0; i-- { + kk := min(k, n-i-1) + jx := ix + incX + xi := x[ix] + for _, aij := range a[i*lda+1 : i*lda+kk+1] { + x[jx] += xi * cmplx.Conj(aij) + jx += incX + } + if diag == blas.NonUnit { + x[ix] *= cmplx.Conj(a[i*lda]) + } + ix -= incX + } + } + } else { + if incX == 1 { + for i := 0; i < n; i++ { + kk := min(k, i) + xi := x[i] + for j, aij := range a[i*lda+k-kk : i*lda+k] { + x[i-kk+j] += xi * cmplx.Conj(aij) + } + if diag == blas.NonUnit { + x[i] *= cmplx.Conj(a[i*lda+k]) + } + } + } else { + ix := kx + for i := 0; i < n; i++ { + kk := min(k, i) + jx := ix - kk*incX + xi := x[ix] + for _, aij := range a[i*lda+k-kk : i*lda+k] { + x[jx] += xi * cmplx.Conj(aij) + jx += incX + } + if diag == blas.NonUnit { + x[ix] *= cmplx.Conj(a[i*lda+k]) + } + ix += incX + } + } + } + } +} + +// Ztbsv solves one of the systems of equations +// +// A * x = b if trans == blas.NoTrans +// Aᵀ * x = b if trans == blas.Trans +// Aᴴ * x = b if trans == blas.ConjTrans +// +// where b and x are n element vectors and A is an n×n triangular band matrix +// with (k+1) diagonals. +// +// On entry, x contains the values of b, and the solution is +// stored in-place into x. +// +// No test for singularity or near-singularity is included in this +// routine. Such tests must be performed before calling this routine. +func (Implementation) Ztbsv(uplo blas.Uplo, trans blas.Transpose, diag blas.Diag, n, k int, a []complex128, lda int, x []complex128, incX int) { + switch trans { + default: + panic(badTranspose) + case blas.NoTrans, blas.Trans, blas.ConjTrans: + } + switch uplo { + default: + panic(badUplo) + case blas.Upper, blas.Lower: + } + switch diag { + default: + panic(badDiag) + case blas.NonUnit, blas.Unit: + } + if n < 0 { + panic(nLT0) + } + if k < 0 { + panic(kLT0) + } + if lda < k+1 { + panic(badLdA) + } + if incX == 0 { + panic(zeroIncX) + } + + // Quick return if possible. + if n == 0 { + return + } + + // For zero matrix size the following slice length checks are trivially satisfied. + if len(a) < lda*(n-1)+k+1 { + panic(shortA) + } + if (incX > 0 && len(x) <= (n-1)*incX) || (incX < 0 && len(x) <= (1-n)*incX) { + panic(shortX) + } + + // Set up start index in X. + var kx int + if incX < 0 { + kx = (1 - n) * incX + } + + switch trans { + case blas.NoTrans: + if uplo == blas.Upper { + if incX == 1 { + for i := n - 1; i >= 0; i-- { + kk := min(k, n-i-1) + var sum complex128 + for j, aij := range a[i*lda+1 : i*lda+kk+1] { + sum += x[i+1+j] * aij + } + x[i] -= sum + if diag == blas.NonUnit { + x[i] /= a[i*lda] + } + } + } else { + ix := kx + (n-1)*incX + for i := n - 1; i >= 0; i-- { + kk := min(k, n-i-1) + var sum complex128 + jx := ix + incX + for _, aij := range a[i*lda+1 : i*lda+kk+1] { + sum += x[jx] * aij + jx += incX + } + x[ix] -= sum + if diag == blas.NonUnit { + x[ix] /= a[i*lda] + } + ix -= incX + } + } + } else { + if incX == 1 { + for i := 0; i < n; i++ { + kk := min(k, i) + var sum complex128 + for j, aij := range a[i*lda+k-kk : i*lda+k] { + sum += x[i-kk+j] * aij + } + x[i] -= sum + if diag == blas.NonUnit { + x[i] /= a[i*lda+k] + } + } + } else { + ix := kx + for i := 0; i < n; i++ { + kk := min(k, i) + var sum complex128 + jx := ix - kk*incX + for _, aij := range a[i*lda+k-kk : i*lda+k] { + sum += x[jx] * aij + jx += incX + } + x[ix] -= sum + if diag == blas.NonUnit { + x[ix] /= a[i*lda+k] + } + ix += incX + } + } + } + case blas.Trans: + if uplo == blas.Upper { + if incX == 1 { + for i := 0; i < n; i++ { + if diag == blas.NonUnit { + x[i] /= a[i*lda] + } + kk := min(k, n-i-1) + xi := x[i] + for j, aij := range a[i*lda+1 : i*lda+kk+1] { + x[i+1+j] -= xi * aij + } + } + } else { + ix := kx + for i := 0; i < n; i++ { + if diag == blas.NonUnit { + x[ix] /= a[i*lda] + } + kk := min(k, n-i-1) + xi := x[ix] + jx := ix + incX + for _, aij := range a[i*lda+1 : i*lda+kk+1] { + x[jx] -= xi * aij + jx += incX + } + ix += incX + } + } + } else { + if incX == 1 { + for i := n - 1; i >= 0; i-- { + if diag == blas.NonUnit { + x[i] /= a[i*lda+k] + } + kk := min(k, i) + xi := x[i] + for j, aij := range a[i*lda+k-kk : i*lda+k] { + x[i-kk+j] -= xi * aij + } + } + } else { + ix := kx + (n-1)*incX + for i := n - 1; i >= 0; i-- { + if diag == blas.NonUnit { + x[ix] /= a[i*lda+k] + } + kk := min(k, i) + xi := x[ix] + jx := ix - kk*incX + for _, aij := range a[i*lda+k-kk : i*lda+k] { + x[jx] -= xi * aij + jx += incX + } + ix -= incX + } + } + } + case blas.ConjTrans: + if uplo == blas.Upper { + if incX == 1 { + for i := 0; i < n; i++ { + if diag == blas.NonUnit { + x[i] /= cmplx.Conj(a[i*lda]) + } + kk := min(k, n-i-1) + xi := x[i] + for j, aij := range a[i*lda+1 : i*lda+kk+1] { + x[i+1+j] -= xi * cmplx.Conj(aij) + } + } + } else { + ix := kx + for i := 0; i < n; i++ { + if diag == blas.NonUnit { + x[ix] /= cmplx.Conj(a[i*lda]) + } + kk := min(k, n-i-1) + xi := x[ix] + jx := ix + incX + for _, aij := range a[i*lda+1 : i*lda+kk+1] { + x[jx] -= xi * cmplx.Conj(aij) + jx += incX + } + ix += incX + } + } + } else { + if incX == 1 { + for i := n - 1; i >= 0; i-- { + if diag == blas.NonUnit { + x[i] /= cmplx.Conj(a[i*lda+k]) + } + kk := min(k, i) + xi := x[i] + for j, aij := range a[i*lda+k-kk : i*lda+k] { + x[i-kk+j] -= xi * cmplx.Conj(aij) + } + } + } else { + ix := kx + (n-1)*incX + for i := n - 1; i >= 0; i-- { + if diag == blas.NonUnit { + x[ix] /= cmplx.Conj(a[i*lda+k]) + } + kk := min(k, i) + xi := x[ix] + jx := ix - kk*incX + for _, aij := range a[i*lda+k-kk : i*lda+k] { + x[jx] -= xi * cmplx.Conj(aij) + jx += incX + } + ix -= incX + } + } + } + } +} + +// Ztpmv performs one of the matrix-vector operations +// +// x = A * x if trans = blas.NoTrans +// x = Aᵀ * x if trans = blas.Trans +// x = Aᴴ * x if trans = blas.ConjTrans +// +// where x is an n element vector and A is an n×n triangular matrix, supplied in +// packed form. +func (Implementation) Ztpmv(uplo blas.Uplo, trans blas.Transpose, diag blas.Diag, n int, ap []complex128, x []complex128, incX int) { + switch uplo { + default: + panic(badUplo) + case blas.Upper, blas.Lower: + } + switch trans { + default: + panic(badTranspose) + case blas.NoTrans, blas.Trans, blas.ConjTrans: + } + switch diag { + default: + panic(badDiag) + case blas.NonUnit, blas.Unit: + } + if n < 0 { + panic(nLT0) + } + if incX == 0 { + panic(zeroIncX) + } + + // Quick return if possible. + if n == 0 { + return + } + + // For zero matrix size the following slice length checks are trivially satisfied. + if len(ap) < n*(n+1)/2 { + panic(shortAP) + } + if (incX > 0 && len(x) <= (n-1)*incX) || (incX < 0 && len(x) <= (1-n)*incX) { + panic(shortX) + } + + // Set up start index in X. + var kx int + if incX < 0 { + kx = (1 - n) * incX + } + + // The elements of A are accessed sequentially with one pass through A. + + if trans == blas.NoTrans { + // Form x = A*x. + if uplo == blas.Upper { + // kk points to the current diagonal element in ap. + kk := 0 + if incX == 1 { + x = x[:n] + for i := range x { + if diag == blas.NonUnit { + x[i] *= ap[kk] + } + if n-i-1 > 0 { + x[i] += c128.DotuUnitary(ap[kk+1:kk+n-i], x[i+1:]) + } + kk += n - i + } + } else { + ix := kx + for i := 0; i < n; i++ { + if diag == blas.NonUnit { + x[ix] *= ap[kk] + } + if n-i-1 > 0 { + x[ix] += c128.DotuInc(ap[kk+1:kk+n-i], x, uintptr(n-i-1), 1, uintptr(incX), 0, uintptr(ix+incX)) + } + ix += incX + kk += n - i + } + } + } else { + // kk points to the beginning of current row in ap. + kk := n*(n+1)/2 - n + if incX == 1 { + for i := n - 1; i >= 0; i-- { + if diag == blas.NonUnit { + x[i] *= ap[kk+i] + } + if i > 0 { + x[i] += c128.DotuUnitary(ap[kk:kk+i], x[:i]) + } + kk -= i + } + } else { + ix := kx + (n-1)*incX + for i := n - 1; i >= 0; i-- { + if diag == blas.NonUnit { + x[ix] *= ap[kk+i] + } + if i > 0 { + x[ix] += c128.DotuInc(ap[kk:kk+i], x, uintptr(i), 1, uintptr(incX), 0, uintptr(kx)) + } + ix -= incX + kk -= i + } + } + } + return + } + + if trans == blas.Trans { + // Form x = Aᵀ*x. + if uplo == blas.Upper { + // kk points to the current diagonal element in ap. + kk := n*(n+1)/2 - 1 + if incX == 1 { + for i := n - 1; i >= 0; i-- { + xi := x[i] + if diag == blas.NonUnit { + x[i] *= ap[kk] + } + if n-i-1 > 0 { + c128.AxpyUnitary(xi, ap[kk+1:kk+n-i], x[i+1:n]) + } + kk -= n - i + 1 + } + } else { + ix := kx + (n-1)*incX + for i := n - 1; i >= 0; i-- { + xi := x[ix] + if diag == blas.NonUnit { + x[ix] *= ap[kk] + } + if n-i-1 > 0 { + c128.AxpyInc(xi, ap[kk+1:kk+n-i], x, uintptr(n-i-1), 1, uintptr(incX), 0, uintptr(ix+incX)) + } + ix -= incX + kk -= n - i + 1 + } + } + } else { + // kk points to the beginning of current row in ap. + kk := 0 + if incX == 1 { + x = x[:n] + for i := range x { + if i > 0 { + c128.AxpyUnitary(x[i], ap[kk:kk+i], x[:i]) + } + if diag == blas.NonUnit { + x[i] *= ap[kk+i] + } + kk += i + 1 + } + } else { + ix := kx + for i := 0; i < n; i++ { + if i > 0 { + c128.AxpyInc(x[ix], ap[kk:kk+i], x, uintptr(i), 1, uintptr(incX), 0, uintptr(kx)) + } + if diag == blas.NonUnit { + x[ix] *= ap[kk+i] + } + ix += incX + kk += i + 1 + } + } + } + return + } + + // Form x = Aᴴ*x. + if uplo == blas.Upper { + // kk points to the current diagonal element in ap. + kk := n*(n+1)/2 - 1 + if incX == 1 { + for i := n - 1; i >= 0; i-- { + xi := x[i] + if diag == blas.NonUnit { + x[i] *= cmplx.Conj(ap[kk]) + } + k := kk + 1 + for j := i + 1; j < n; j++ { + x[j] += xi * cmplx.Conj(ap[k]) + k++ + } + kk -= n - i + 1 + } + } else { + ix := kx + (n-1)*incX + for i := n - 1; i >= 0; i-- { + xi := x[ix] + if diag == blas.NonUnit { + x[ix] *= cmplx.Conj(ap[kk]) + } + jx := ix + incX + k := kk + 1 + for j := i + 1; j < n; j++ { + x[jx] += xi * cmplx.Conj(ap[k]) + jx += incX + k++ + } + ix -= incX + kk -= n - i + 1 + } + } + } else { + // kk points to the beginning of current row in ap. + kk := 0 + if incX == 1 { + x = x[:n] + for i, xi := range x { + for j := 0; j < i; j++ { + x[j] += xi * cmplx.Conj(ap[kk+j]) + } + if diag == blas.NonUnit { + x[i] *= cmplx.Conj(ap[kk+i]) + } + kk += i + 1 + } + } else { + ix := kx + for i := 0; i < n; i++ { + xi := x[ix] + jx := kx + for j := 0; j < i; j++ { + x[jx] += xi * cmplx.Conj(ap[kk+j]) + jx += incX + } + if diag == blas.NonUnit { + x[ix] *= cmplx.Conj(ap[kk+i]) + } + ix += incX + kk += i + 1 + } + } + } +} + +// Ztpsv solves one of the systems of equations +// +// A * x = b if trans == blas.NoTrans +// Aᵀ * x = b if trans == blas.Trans +// Aᴴ * x = b if trans == blas.ConjTrans +// +// where b and x are n element vectors and A is an n×n triangular matrix in +// packed form. +// +// On entry, x contains the values of b, and the solution is +// stored in-place into x. +// +// No test for singularity or near-singularity is included in this +// routine. Such tests must be performed before calling this routine. +func (Implementation) Ztpsv(uplo blas.Uplo, trans blas.Transpose, diag blas.Diag, n int, ap []complex128, x []complex128, incX int) { + switch uplo { + default: + panic(badUplo) + case blas.Upper, blas.Lower: + } + switch trans { + default: + panic(badTranspose) + case blas.NoTrans, blas.Trans, blas.ConjTrans: + } + switch diag { + default: + panic(badDiag) + case blas.NonUnit, blas.Unit: + } + if n < 0 { + panic(nLT0) + } + if incX == 0 { + panic(zeroIncX) + } + + // Quick return if possible. + if n == 0 { + return + } + + // For zero matrix size the following slice length checks are trivially satisfied. + if len(ap) < n*(n+1)/2 { + panic(shortAP) + } + if (incX > 0 && len(x) <= (n-1)*incX) || (incX < 0 && len(x) <= (1-n)*incX) { + panic(shortX) + } + + // Set up start index in X. + var kx int + if incX < 0 { + kx = (1 - n) * incX + } + + // The elements of A are accessed sequentially with one pass through ap. + + if trans == blas.NoTrans { + // Form x = inv(A)*x. + if uplo == blas.Upper { + kk := n*(n+1)/2 - 1 + if incX == 1 { + for i := n - 1; i >= 0; i-- { + aii := ap[kk] + if n-i-1 > 0 { + x[i] -= c128.DotuUnitary(x[i+1:n], ap[kk+1:kk+n-i]) + } + if diag == blas.NonUnit { + x[i] /= aii + } + kk -= n - i + 1 + } + } else { + ix := kx + (n-1)*incX + for i := n - 1; i >= 0; i-- { + aii := ap[kk] + if n-i-1 > 0 { + x[ix] -= c128.DotuInc(x, ap[kk+1:kk+n-i], uintptr(n-i-1), uintptr(incX), 1, uintptr(ix+incX), 0) + } + if diag == blas.NonUnit { + x[ix] /= aii + } + ix -= incX + kk -= n - i + 1 + } + } + } else { + kk := 0 + if incX == 1 { + for i := 0; i < n; i++ { + if i > 0 { + x[i] -= c128.DotuUnitary(x[:i], ap[kk:kk+i]) + } + if diag == blas.NonUnit { + x[i] /= ap[kk+i] + } + kk += i + 1 + } + } else { + ix := kx + for i := 0; i < n; i++ { + if i > 0 { + x[ix] -= c128.DotuInc(x, ap[kk:kk+i], uintptr(i), uintptr(incX), 1, uintptr(kx), 0) + } + if diag == blas.NonUnit { + x[ix] /= ap[kk+i] + } + ix += incX + kk += i + 1 + } + } + } + return + } + + if trans == blas.Trans { + // Form x = inv(Aᵀ)*x. + if uplo == blas.Upper { + kk := 0 + if incX == 1 { + for j := 0; j < n; j++ { + if diag == blas.NonUnit { + x[j] /= ap[kk] + } + if n-j-1 > 0 { + c128.AxpyUnitary(-x[j], ap[kk+1:kk+n-j], x[j+1:n]) + } + kk += n - j + } + } else { + jx := kx + for j := 0; j < n; j++ { + if diag == blas.NonUnit { + x[jx] /= ap[kk] + } + if n-j-1 > 0 { + c128.AxpyInc(-x[jx], ap[kk+1:kk+n-j], x, uintptr(n-j-1), 1, uintptr(incX), 0, uintptr(jx+incX)) + } + jx += incX + kk += n - j + } + } + } else { + kk := n*(n+1)/2 - n + if incX == 1 { + for j := n - 1; j >= 0; j-- { + if diag == blas.NonUnit { + x[j] /= ap[kk+j] + } + if j > 0 { + c128.AxpyUnitary(-x[j], ap[kk:kk+j], x[:j]) + } + kk -= j + } + } else { + jx := kx + (n-1)*incX + for j := n - 1; j >= 0; j-- { + if diag == blas.NonUnit { + x[jx] /= ap[kk+j] + } + if j > 0 { + c128.AxpyInc(-x[jx], ap[kk:kk+j], x, uintptr(j), 1, uintptr(incX), 0, uintptr(kx)) + } + jx -= incX + kk -= j + } + } + } + return + } + + // Form x = inv(Aᴴ)*x. + if uplo == blas.Upper { + kk := 0 + if incX == 1 { + for j := 0; j < n; j++ { + if diag == blas.NonUnit { + x[j] /= cmplx.Conj(ap[kk]) + } + xj := x[j] + k := kk + 1 + for i := j + 1; i < n; i++ { + x[i] -= xj * cmplx.Conj(ap[k]) + k++ + } + kk += n - j + } + } else { + jx := kx + for j := 0; j < n; j++ { + if diag == blas.NonUnit { + x[jx] /= cmplx.Conj(ap[kk]) + } + xj := x[jx] + ix := jx + incX + k := kk + 1 + for i := j + 1; i < n; i++ { + x[ix] -= xj * cmplx.Conj(ap[k]) + ix += incX + k++ + } + jx += incX + kk += n - j + } + } + } else { + kk := n*(n+1)/2 - n + if incX == 1 { + for j := n - 1; j >= 0; j-- { + if diag == blas.NonUnit { + x[j] /= cmplx.Conj(ap[kk+j]) + } + xj := x[j] + for i := 0; i < j; i++ { + x[i] -= xj * cmplx.Conj(ap[kk+i]) + } + kk -= j + } + } else { + jx := kx + (n-1)*incX + for j := n - 1; j >= 0; j-- { + if diag == blas.NonUnit { + x[jx] /= cmplx.Conj(ap[kk+j]) + } + xj := x[jx] + ix := kx + for i := 0; i < j; i++ { + x[ix] -= xj * cmplx.Conj(ap[kk+i]) + ix += incX + } + jx -= incX + kk -= j + } + } + } +} + +// Ztrmv performs one of the matrix-vector operations +// +// x = A * x if trans = blas.NoTrans +// x = Aᵀ * x if trans = blas.Trans +// x = Aᴴ * x if trans = blas.ConjTrans +// +// where x is a vector, and A is an n×n triangular matrix. +func (Implementation) Ztrmv(uplo blas.Uplo, trans blas.Transpose, diag blas.Diag, n int, a []complex128, lda int, x []complex128, incX int) { + switch trans { + default: + panic(badTranspose) + case blas.NoTrans, blas.Trans, blas.ConjTrans: + } + switch uplo { + default: + panic(badUplo) + case blas.Upper, blas.Lower: + } + switch diag { + default: + panic(badDiag) + case blas.NonUnit, blas.Unit: + } + if n < 0 { + panic(nLT0) + } + if lda < max(1, n) { + panic(badLdA) + } + if incX == 0 { + panic(zeroIncX) + } + + // Quick return if possible. + if n == 0 { + return + } + + // For zero matrix size the following slice length checks are trivially satisfied. + if len(a) < lda*(n-1)+n { + panic(shortA) + } + if (incX > 0 && len(x) <= (n-1)*incX) || (incX < 0 && len(x) <= (1-n)*incX) { + panic(shortX) + } + + // Set up start index in X. + var kx int + if incX < 0 { + kx = (1 - n) * incX + } + + // The elements of A are accessed sequentially with one pass through A. + + if trans == blas.NoTrans { + // Form x = A*x. + if uplo == blas.Upper { + if incX == 1 { + for i := 0; i < n; i++ { + if diag == blas.NonUnit { + x[i] *= a[i*lda+i] + } + if n-i-1 > 0 { + x[i] += c128.DotuUnitary(a[i*lda+i+1:i*lda+n], x[i+1:n]) + } + } + } else { + ix := kx + for i := 0; i < n; i++ { + if diag == blas.NonUnit { + x[ix] *= a[i*lda+i] + } + if n-i-1 > 0 { + x[ix] += c128.DotuInc(a[i*lda+i+1:i*lda+n], x, uintptr(n-i-1), 1, uintptr(incX), 0, uintptr(ix+incX)) + } + ix += incX + } + } + } else { + if incX == 1 { + for i := n - 1; i >= 0; i-- { + if diag == blas.NonUnit { + x[i] *= a[i*lda+i] + } + if i > 0 { + x[i] += c128.DotuUnitary(a[i*lda:i*lda+i], x[:i]) + } + } + } else { + ix := kx + (n-1)*incX + for i := n - 1; i >= 0; i-- { + if diag == blas.NonUnit { + x[ix] *= a[i*lda+i] + } + if i > 0 { + x[ix] += c128.DotuInc(a[i*lda:i*lda+i], x, uintptr(i), 1, uintptr(incX), 0, uintptr(kx)) + } + ix -= incX + } + } + } + return + } + + if trans == blas.Trans { + // Form x = Aᵀ*x. + if uplo == blas.Upper { + if incX == 1 { + for i := n - 1; i >= 0; i-- { + xi := x[i] + if diag == blas.NonUnit { + x[i] *= a[i*lda+i] + } + if n-i-1 > 0 { + c128.AxpyUnitary(xi, a[i*lda+i+1:i*lda+n], x[i+1:n]) + } + } + } else { + ix := kx + (n-1)*incX + for i := n - 1; i >= 0; i-- { + xi := x[ix] + if diag == blas.NonUnit { + x[ix] *= a[i*lda+i] + } + if n-i-1 > 0 { + c128.AxpyInc(xi, a[i*lda+i+1:i*lda+n], x, uintptr(n-i-1), 1, uintptr(incX), 0, uintptr(ix+incX)) + } + ix -= incX + } + } + } else { + if incX == 1 { + for i := 0; i < n; i++ { + if i > 0 { + c128.AxpyUnitary(x[i], a[i*lda:i*lda+i], x[:i]) + } + if diag == blas.NonUnit { + x[i] *= a[i*lda+i] + } + } + } else { + ix := kx + for i := 0; i < n; i++ { + if i > 0 { + c128.AxpyInc(x[ix], a[i*lda:i*lda+i], x, uintptr(i), 1, uintptr(incX), 0, uintptr(kx)) + } + if diag == blas.NonUnit { + x[ix] *= a[i*lda+i] + } + ix += incX + } + } + } + return + } + + // Form x = Aᴴ*x. + if uplo == blas.Upper { + if incX == 1 { + for i := n - 1; i >= 0; i-- { + xi := x[i] + if diag == blas.NonUnit { + x[i] *= cmplx.Conj(a[i*lda+i]) + } + for j := i + 1; j < n; j++ { + x[j] += xi * cmplx.Conj(a[i*lda+j]) + } + } + } else { + ix := kx + (n-1)*incX + for i := n - 1; i >= 0; i-- { + xi := x[ix] + if diag == blas.NonUnit { + x[ix] *= cmplx.Conj(a[i*lda+i]) + } + jx := ix + incX + for j := i + 1; j < n; j++ { + x[jx] += xi * cmplx.Conj(a[i*lda+j]) + jx += incX + } + ix -= incX + } + } + } else { + if incX == 1 { + for i := 0; i < n; i++ { + for j := 0; j < i; j++ { + x[j] += x[i] * cmplx.Conj(a[i*lda+j]) + } + if diag == blas.NonUnit { + x[i] *= cmplx.Conj(a[i*lda+i]) + } + } + } else { + ix := kx + for i := 0; i < n; i++ { + jx := kx + for j := 0; j < i; j++ { + x[jx] += x[ix] * cmplx.Conj(a[i*lda+j]) + jx += incX + } + if diag == blas.NonUnit { + x[ix] *= cmplx.Conj(a[i*lda+i]) + } + ix += incX + } + } + } +} + +// Ztrsv solves one of the systems of equations +// +// A * x = b if trans == blas.NoTrans +// Aᵀ * x = b if trans == blas.Trans +// Aᴴ * x = b if trans == blas.ConjTrans +// +// where b and x are n element vectors and A is an n×n triangular matrix. +// +// On entry, x contains the values of b, and the solution is +// stored in-place into x. +// +// No test for singularity or near-singularity is included in this +// routine. Such tests must be performed before calling this routine. +func (Implementation) Ztrsv(uplo blas.Uplo, trans blas.Transpose, diag blas.Diag, n int, a []complex128, lda int, x []complex128, incX int) { + switch trans { + default: + panic(badTranspose) + case blas.NoTrans, blas.Trans, blas.ConjTrans: + } + switch uplo { + default: + panic(badUplo) + case blas.Upper, blas.Lower: + } + switch diag { + default: + panic(badDiag) + case blas.NonUnit, blas.Unit: + } + if n < 0 { + panic(nLT0) + } + if lda < max(1, n) { + panic(badLdA) + } + if incX == 0 { + panic(zeroIncX) + } + + // Quick return if possible. + if n == 0 { + return + } + + // For zero matrix size the following slice length checks are trivially satisfied. + if len(a) < lda*(n-1)+n { + panic(shortA) + } + if (incX > 0 && len(x) <= (n-1)*incX) || (incX < 0 && len(x) <= (1-n)*incX) { + panic(shortX) + } + + // Set up start index in X. + var kx int + if incX < 0 { + kx = (1 - n) * incX + } + + // The elements of A are accessed sequentially with one pass through A. + + if trans == blas.NoTrans { + // Form x = inv(A)*x. + if uplo == blas.Upper { + if incX == 1 { + for i := n - 1; i >= 0; i-- { + aii := a[i*lda+i] + if n-i-1 > 0 { + x[i] -= c128.DotuUnitary(x[i+1:n], a[i*lda+i+1:i*lda+n]) + } + if diag == blas.NonUnit { + x[i] /= aii + } + } + } else { + ix := kx + (n-1)*incX + for i := n - 1; i >= 0; i-- { + aii := a[i*lda+i] + if n-i-1 > 0 { + x[ix] -= c128.DotuInc(x, a[i*lda+i+1:i*lda+n], uintptr(n-i-1), uintptr(incX), 1, uintptr(ix+incX), 0) + } + if diag == blas.NonUnit { + x[ix] /= aii + } + ix -= incX + } + } + } else { + if incX == 1 { + for i := 0; i < n; i++ { + if i > 0 { + x[i] -= c128.DotuUnitary(x[:i], a[i*lda:i*lda+i]) + } + if diag == blas.NonUnit { + x[i] /= a[i*lda+i] + } + } + } else { + ix := kx + for i := 0; i < n; i++ { + if i > 0 { + x[ix] -= c128.DotuInc(x, a[i*lda:i*lda+i], uintptr(i), uintptr(incX), 1, uintptr(kx), 0) + } + if diag == blas.NonUnit { + x[ix] /= a[i*lda+i] + } + ix += incX + } + } + } + return + } + + if trans == blas.Trans { + // Form x = inv(Aᵀ)*x. + if uplo == blas.Upper { + if incX == 1 { + for j := 0; j < n; j++ { + if diag == blas.NonUnit { + x[j] /= a[j*lda+j] + } + if n-j-1 > 0 { + c128.AxpyUnitary(-x[j], a[j*lda+j+1:j*lda+n], x[j+1:n]) + } + } + } else { + jx := kx + for j := 0; j < n; j++ { + if diag == blas.NonUnit { + x[jx] /= a[j*lda+j] + } + if n-j-1 > 0 { + c128.AxpyInc(-x[jx], a[j*lda+j+1:j*lda+n], x, uintptr(n-j-1), 1, uintptr(incX), 0, uintptr(jx+incX)) + } + jx += incX + } + } + } else { + if incX == 1 { + for j := n - 1; j >= 0; j-- { + if diag == blas.NonUnit { + x[j] /= a[j*lda+j] + } + xj := x[j] + if j > 0 { + c128.AxpyUnitary(-xj, a[j*lda:j*lda+j], x[:j]) + } + } + } else { + jx := kx + (n-1)*incX + for j := n - 1; j >= 0; j-- { + if diag == blas.NonUnit { + x[jx] /= a[j*lda+j] + } + if j > 0 { + c128.AxpyInc(-x[jx], a[j*lda:j*lda+j], x, uintptr(j), 1, uintptr(incX), 0, uintptr(kx)) + } + jx -= incX + } + } + } + return + } + + // Form x = inv(Aᴴ)*x. + if uplo == blas.Upper { + if incX == 1 { + for j := 0; j < n; j++ { + if diag == blas.NonUnit { + x[j] /= cmplx.Conj(a[j*lda+j]) + } + xj := x[j] + for i := j + 1; i < n; i++ { + x[i] -= xj * cmplx.Conj(a[j*lda+i]) + } + } + } else { + jx := kx + for j := 0; j < n; j++ { + if diag == blas.NonUnit { + x[jx] /= cmplx.Conj(a[j*lda+j]) + } + xj := x[jx] + ix := jx + incX + for i := j + 1; i < n; i++ { + x[ix] -= xj * cmplx.Conj(a[j*lda+i]) + ix += incX + } + jx += incX + } + } + } else { + if incX == 1 { + for j := n - 1; j >= 0; j-- { + if diag == blas.NonUnit { + x[j] /= cmplx.Conj(a[j*lda+j]) + } + xj := x[j] + for i := 0; i < j; i++ { + x[i] -= xj * cmplx.Conj(a[j*lda+i]) + } + } + } else { + jx := kx + (n-1)*incX + for j := n - 1; j >= 0; j-- { + if diag == blas.NonUnit { + x[jx] /= cmplx.Conj(a[j*lda+j]) + } + xj := x[jx] + ix := kx + for i := 0; i < j; i++ { + x[ix] -= xj * cmplx.Conj(a[j*lda+i]) + ix += incX + } + jx -= incX + } + } + } +} diff --git a/vendor/gonum.org/v1/gonum/blas/gonum/level2cmplx64.go b/vendor/gonum.org/v1/gonum/blas/gonum/level2cmplx64.go new file mode 100644 index 0000000000..3ce67868cd --- /dev/null +++ b/vendor/gonum.org/v1/gonum/blas/gonum/level2cmplx64.go @@ -0,0 +1,2976 @@ +// Code generated by "go generate gonum.org/v1/gonum/blas/gonum”; DO NOT EDIT. + +// Copyright ©2017 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gonum + +import ( + cmplx "gonum.org/v1/gonum/internal/cmplx64" + + "gonum.org/v1/gonum/blas" + "gonum.org/v1/gonum/internal/asm/c64" +) + +var _ blas.Complex64Level2 = Implementation{} + +// Cgbmv performs one of the matrix-vector operations +// +// y = alpha * A * x + beta * y if trans = blas.NoTrans +// y = alpha * Aᵀ * x + beta * y if trans = blas.Trans +// y = alpha * Aᴴ * x + beta * y if trans = blas.ConjTrans +// +// where alpha and beta are scalars, x and y are vectors, and A is an m×n band matrix +// with kL sub-diagonals and kU super-diagonals. +// +// Complex64 implementations are autogenerated and not directly tested. +func (Implementation) Cgbmv(trans blas.Transpose, m, n, kL, kU int, alpha complex64, a []complex64, lda int, x []complex64, incX int, beta complex64, y []complex64, incY int) { + switch trans { + default: + panic(badTranspose) + case blas.NoTrans, blas.Trans, blas.ConjTrans: + } + if m < 0 { + panic(mLT0) + } + if n < 0 { + panic(nLT0) + } + if kL < 0 { + panic(kLLT0) + } + if kU < 0 { + panic(kULT0) + } + if lda < kL+kU+1 { + panic(badLdA) + } + if incX == 0 { + panic(zeroIncX) + } + if incY == 0 { + panic(zeroIncY) + } + + // Quick return if possible. + if m == 0 || n == 0 { + return + } + + // For zero matrix size the following slice length checks are trivially satisfied. + if len(a) < lda*(min(m, n+kL)-1)+kL+kU+1 { + panic(shortA) + } + var lenX, lenY int + if trans == blas.NoTrans { + lenX, lenY = n, m + } else { + lenX, lenY = m, n + } + if (incX > 0 && len(x) <= (lenX-1)*incX) || (incX < 0 && len(x) <= (1-lenX)*incX) { + panic(shortX) + } + if (incY > 0 && len(y) <= (lenY-1)*incY) || (incY < 0 && len(y) <= (1-lenY)*incY) { + panic(shortY) + } + + // Quick return if possible. + if alpha == 0 && beta == 1 { + return + } + + var kx int + if incX < 0 { + kx = (1 - lenX) * incX + } + var ky int + if incY < 0 { + ky = (1 - lenY) * incY + } + + // Form y = beta*y. + if beta != 1 { + if incY == 1 { + if beta == 0 { + for i := range y[:lenY] { + y[i] = 0 + } + } else { + c64.ScalUnitary(beta, y[:lenY]) + } + } else { + iy := ky + if beta == 0 { + for i := 0; i < lenY; i++ { + y[iy] = 0 + iy += incY + } + } else { + if incY > 0 { + c64.ScalInc(beta, y, uintptr(lenY), uintptr(incY)) + } else { + c64.ScalInc(beta, y, uintptr(lenY), uintptr(-incY)) + } + } + } + } + + nRow := min(m, n+kL) + nCol := kL + 1 + kU + switch trans { + case blas.NoTrans: + iy := ky + if incX == 1 { + for i := 0; i < nRow; i++ { + l := max(0, kL-i) + u := min(nCol, n+kL-i) + aRow := a[i*lda+l : i*lda+u] + off := max(0, i-kL) + xtmp := x[off : off+u-l] + var sum complex64 + for j, v := range aRow { + sum += xtmp[j] * v + } + y[iy] += alpha * sum + iy += incY + } + } else { + for i := 0; i < nRow; i++ { + l := max(0, kL-i) + u := min(nCol, n+kL-i) + aRow := a[i*lda+l : i*lda+u] + off := max(0, i-kL) * incX + jx := kx + var sum complex64 + for _, v := range aRow { + sum += x[off+jx] * v + jx += incX + } + y[iy] += alpha * sum + iy += incY + } + } + case blas.Trans: + if incX == 1 { + for i := 0; i < nRow; i++ { + l := max(0, kL-i) + u := min(nCol, n+kL-i) + aRow := a[i*lda+l : i*lda+u] + off := max(0, i-kL) * incY + alphaxi := alpha * x[i] + jy := ky + for _, v := range aRow { + y[off+jy] += alphaxi * v + jy += incY + } + } + } else { + ix := kx + for i := 0; i < nRow; i++ { + l := max(0, kL-i) + u := min(nCol, n+kL-i) + aRow := a[i*lda+l : i*lda+u] + off := max(0, i-kL) * incY + alphaxi := alpha * x[ix] + jy := ky + for _, v := range aRow { + y[off+jy] += alphaxi * v + jy += incY + } + ix += incX + } + } + case blas.ConjTrans: + if incX == 1 { + for i := 0; i < nRow; i++ { + l := max(0, kL-i) + u := min(nCol, n+kL-i) + aRow := a[i*lda+l : i*lda+u] + off := max(0, i-kL) * incY + alphaxi := alpha * x[i] + jy := ky + for _, v := range aRow { + y[off+jy] += alphaxi * cmplx.Conj(v) + jy += incY + } + } + } else { + ix := kx + for i := 0; i < nRow; i++ { + l := max(0, kL-i) + u := min(nCol, n+kL-i) + aRow := a[i*lda+l : i*lda+u] + off := max(0, i-kL) * incY + alphaxi := alpha * x[ix] + jy := ky + for _, v := range aRow { + y[off+jy] += alphaxi * cmplx.Conj(v) + jy += incY + } + ix += incX + } + } + } +} + +// Cgemv performs one of the matrix-vector operations +// +// y = alpha * A * x + beta * y if trans = blas.NoTrans +// y = alpha * Aᵀ * x + beta * y if trans = blas.Trans +// y = alpha * Aᴴ * x + beta * y if trans = blas.ConjTrans +// +// where alpha and beta are scalars, x and y are vectors, and A is an m×n dense matrix. +// +// Complex64 implementations are autogenerated and not directly tested. +func (Implementation) Cgemv(trans blas.Transpose, m, n int, alpha complex64, a []complex64, lda int, x []complex64, incX int, beta complex64, y []complex64, incY int) { + switch trans { + default: + panic(badTranspose) + case blas.NoTrans, blas.Trans, blas.ConjTrans: + } + if m < 0 { + panic(mLT0) + } + if n < 0 { + panic(nLT0) + } + if lda < max(1, n) { + panic(badLdA) + } + if incX == 0 { + panic(zeroIncX) + } + if incY == 0 { + panic(zeroIncY) + } + + // Quick return if possible. + if m == 0 || n == 0 { + return + } + + // For zero matrix size the following slice length checks are trivially satisfied. + var lenX, lenY int + if trans == blas.NoTrans { + lenX = n + lenY = m + } else { + lenX = m + lenY = n + } + if len(a) < lda*(m-1)+n { + panic(shortA) + } + if (incX > 0 && len(x) <= (lenX-1)*incX) || (incX < 0 && len(x) <= (1-lenX)*incX) { + panic(shortX) + } + if (incY > 0 && len(y) <= (lenY-1)*incY) || (incY < 0 && len(y) <= (1-lenY)*incY) { + panic(shortY) + } + + // Quick return if possible. + if alpha == 0 && beta == 1 { + return + } + + var kx int + if incX < 0 { + kx = (1 - lenX) * incX + } + var ky int + if incY < 0 { + ky = (1 - lenY) * incY + } + + // Form y = beta*y. + if beta != 1 { + if incY == 1 { + if beta == 0 { + for i := range y[:lenY] { + y[i] = 0 + } + } else { + c64.ScalUnitary(beta, y[:lenY]) + } + } else { + iy := ky + if beta == 0 { + for i := 0; i < lenY; i++ { + y[iy] = 0 + iy += incY + } + } else { + if incY > 0 { + c64.ScalInc(beta, y, uintptr(lenY), uintptr(incY)) + } else { + c64.ScalInc(beta, y, uintptr(lenY), uintptr(-incY)) + } + } + } + } + + if alpha == 0 { + return + } + + switch trans { + default: + // Form y = alpha*A*x + y. + iy := ky + if incX == 1 { + for i := 0; i < m; i++ { + y[iy] += alpha * c64.DotuUnitary(a[i*lda:i*lda+n], x[:n]) + iy += incY + } + return + } + for i := 0; i < m; i++ { + y[iy] += alpha * c64.DotuInc(a[i*lda:i*lda+n], x, uintptr(n), 1, uintptr(incX), 0, uintptr(kx)) + iy += incY + } + return + + case blas.Trans: + // Form y = alpha*Aᵀ*x + y. + ix := kx + if incY == 1 { + for i := 0; i < m; i++ { + c64.AxpyUnitary(alpha*x[ix], a[i*lda:i*lda+n], y[:n]) + ix += incX + } + return + } + for i := 0; i < m; i++ { + c64.AxpyInc(alpha*x[ix], a[i*lda:i*lda+n], y, uintptr(n), 1, uintptr(incY), 0, uintptr(ky)) + ix += incX + } + return + + case blas.ConjTrans: + // Form y = alpha*Aᴴ*x + y. + ix := kx + if incY == 1 { + for i := 0; i < m; i++ { + tmp := alpha * x[ix] + for j := 0; j < n; j++ { + y[j] += tmp * cmplx.Conj(a[i*lda+j]) + } + ix += incX + } + return + } + for i := 0; i < m; i++ { + tmp := alpha * x[ix] + jy := ky + for j := 0; j < n; j++ { + y[jy] += tmp * cmplx.Conj(a[i*lda+j]) + jy += incY + } + ix += incX + } + return + } +} + +// Cgerc performs the rank-one operation +// +// A += alpha * x * yᴴ +// +// where A is an m×n dense matrix, alpha is a scalar, x is an m element vector, +// and y is an n element vector. +// +// Complex64 implementations are autogenerated and not directly tested. +func (Implementation) Cgerc(m, n int, alpha complex64, x []complex64, incX int, y []complex64, incY int, a []complex64, lda int) { + if m < 0 { + panic(mLT0) + } + if n < 0 { + panic(nLT0) + } + if lda < max(1, n) { + panic(badLdA) + } + if incX == 0 { + panic(zeroIncX) + } + if incY == 0 { + panic(zeroIncY) + } + + // Quick return if possible. + if m == 0 || n == 0 { + return + } + + // For zero matrix size the following slice length checks are trivially satisfied. + if (incX > 0 && len(x) <= (m-1)*incX) || (incX < 0 && len(x) <= (1-m)*incX) { + panic(shortX) + } + if (incY > 0 && len(y) <= (n-1)*incY) || (incY < 0 && len(y) <= (1-n)*incY) { + panic(shortY) + } + if len(a) < lda*(m-1)+n { + panic(shortA) + } + + // Quick return if possible. + if alpha == 0 { + return + } + + var kx, jy int + if incX < 0 { + kx = (1 - m) * incX + } + if incY < 0 { + jy = (1 - n) * incY + } + for j := 0; j < n; j++ { + if y[jy] != 0 { + tmp := alpha * cmplx.Conj(y[jy]) + c64.AxpyInc(tmp, x, a[j:], uintptr(m), uintptr(incX), uintptr(lda), uintptr(kx), 0) + } + jy += incY + } +} + +// Cgeru performs the rank-one operation +// +// A += alpha * x * yᵀ +// +// where A is an m×n dense matrix, alpha is a scalar, x is an m element vector, +// and y is an n element vector. +// +// Complex64 implementations are autogenerated and not directly tested. +func (Implementation) Cgeru(m, n int, alpha complex64, x []complex64, incX int, y []complex64, incY int, a []complex64, lda int) { + if m < 0 { + panic(mLT0) + } + if n < 0 { + panic(nLT0) + } + if lda < max(1, n) { + panic(badLdA) + } + if incX == 0 { + panic(zeroIncX) + } + if incY == 0 { + panic(zeroIncY) + } + + // Quick return if possible. + if m == 0 || n == 0 { + return + } + + // For zero matrix size the following slice length checks are trivially satisfied. + if (incX > 0 && len(x) <= (m-1)*incX) || (incX < 0 && len(x) <= (1-m)*incX) { + panic(shortX) + } + if (incY > 0 && len(y) <= (n-1)*incY) || (incY < 0 && len(y) <= (1-n)*incY) { + panic(shortY) + } + if len(a) < lda*(m-1)+n { + panic(shortA) + } + + // Quick return if possible. + if alpha == 0 { + return + } + + var kx int + if incX < 0 { + kx = (1 - m) * incX + } + if incY == 1 { + for i := 0; i < m; i++ { + if x[kx] != 0 { + tmp := alpha * x[kx] + c64.AxpyUnitary(tmp, y[:n], a[i*lda:i*lda+n]) + } + kx += incX + } + return + } + var jy int + if incY < 0 { + jy = (1 - n) * incY + } + for i := 0; i < m; i++ { + if x[kx] != 0 { + tmp := alpha * x[kx] + c64.AxpyInc(tmp, y, a[i*lda:i*lda+n], uintptr(n), uintptr(incY), 1, uintptr(jy), 0) + } + kx += incX + } +} + +// Chbmv performs the matrix-vector operation +// +// y = alpha * A * x + beta * y +// +// where alpha and beta are scalars, x and y are vectors, and A is an n×n +// Hermitian band matrix with k super-diagonals. The imaginary parts of +// the diagonal elements of A are ignored and assumed to be zero. +// +// Complex64 implementations are autogenerated and not directly tested. +func (Implementation) Chbmv(uplo blas.Uplo, n, k int, alpha complex64, a []complex64, lda int, x []complex64, incX int, beta complex64, y []complex64, incY int) { + switch uplo { + default: + panic(badUplo) + case blas.Upper, blas.Lower: + } + if n < 0 { + panic(nLT0) + } + if k < 0 { + panic(kLT0) + } + if lda < k+1 { + panic(badLdA) + } + if incX == 0 { + panic(zeroIncX) + } + if incY == 0 { + panic(zeroIncY) + } + + // Quick return if possible. + if n == 0 { + return + } + + // For zero matrix size the following slice length checks are trivially satisfied. + if len(a) < lda*(n-1)+k+1 { + panic(shortA) + } + if (incX > 0 && len(x) <= (n-1)*incX) || (incX < 0 && len(x) <= (1-n)*incX) { + panic(shortX) + } + if (incY > 0 && len(y) <= (n-1)*incY) || (incY < 0 && len(y) <= (1-n)*incY) { + panic(shortY) + } + + // Quick return if possible. + if alpha == 0 && beta == 1 { + return + } + + // Set up the start indices in X and Y. + var kx int + if incX < 0 { + kx = (1 - n) * incX + } + var ky int + if incY < 0 { + ky = (1 - n) * incY + } + + // Form y = beta*y. + if beta != 1 { + if incY == 1 { + if beta == 0 { + for i := range y[:n] { + y[i] = 0 + } + } else { + for i, v := range y[:n] { + y[i] = beta * v + } + } + } else { + iy := ky + if beta == 0 { + for i := 0; i < n; i++ { + y[iy] = 0 + iy += incY + } + } else { + for i := 0; i < n; i++ { + y[iy] = beta * y[iy] + iy += incY + } + } + } + } + + if alpha == 0 { + return + } + + // The elements of A are accessed sequentially with one pass through a. + switch uplo { + case blas.Upper: + iy := ky + if incX == 1 { + for i := 0; i < n; i++ { + aRow := a[i*lda:] + alphaxi := alpha * x[i] + sum := alphaxi * complex(real(aRow[0]), 0) + u := min(k+1, n-i) + jy := incY + for j := 1; j < u; j++ { + v := aRow[j] + sum += alpha * x[i+j] * v + y[iy+jy] += alphaxi * cmplx.Conj(v) + jy += incY + } + y[iy] += sum + iy += incY + } + } else { + ix := kx + for i := 0; i < n; i++ { + aRow := a[i*lda:] + alphaxi := alpha * x[ix] + sum := alphaxi * complex(real(aRow[0]), 0) + u := min(k+1, n-i) + jx := incX + jy := incY + for j := 1; j < u; j++ { + v := aRow[j] + sum += alpha * x[ix+jx] * v + y[iy+jy] += alphaxi * cmplx.Conj(v) + jx += incX + jy += incY + } + y[iy] += sum + ix += incX + iy += incY + } + } + case blas.Lower: + iy := ky + if incX == 1 { + for i := 0; i < n; i++ { + l := max(0, k-i) + alphaxi := alpha * x[i] + jy := l * incY + aRow := a[i*lda:] + for j := l; j < k; j++ { + v := aRow[j] + y[iy] += alpha * v * x[i-k+j] + y[iy-k*incY+jy] += alphaxi * cmplx.Conj(v) + jy += incY + } + y[iy] += alphaxi * complex(real(aRow[k]), 0) + iy += incY + } + } else { + ix := kx + for i := 0; i < n; i++ { + l := max(0, k-i) + alphaxi := alpha * x[ix] + jx := l * incX + jy := l * incY + aRow := a[i*lda:] + for j := l; j < k; j++ { + v := aRow[j] + y[iy] += alpha * v * x[ix-k*incX+jx] + y[iy-k*incY+jy] += alphaxi * cmplx.Conj(v) + jx += incX + jy += incY + } + y[iy] += alphaxi * complex(real(aRow[k]), 0) + ix += incX + iy += incY + } + } + } +} + +// Chemv performs the matrix-vector operation +// +// y = alpha * A * x + beta * y +// +// where alpha and beta are scalars, x and y are vectors, and A is an n×n +// Hermitian matrix. The imaginary parts of the diagonal elements of A are +// ignored and assumed to be zero. +// +// Complex64 implementations are autogenerated and not directly tested. +func (Implementation) Chemv(uplo blas.Uplo, n int, alpha complex64, a []complex64, lda int, x []complex64, incX int, beta complex64, y []complex64, incY int) { + switch uplo { + default: + panic(badUplo) + case blas.Upper, blas.Lower: + } + if n < 0 { + panic(nLT0) + } + if lda < max(1, n) { + panic(badLdA) + } + if incX == 0 { + panic(zeroIncX) + } + if incY == 0 { + panic(zeroIncY) + } + + // Quick return if possible. + if n == 0 { + return + } + + // For zero matrix size the following slice length checks are trivially satisfied. + if len(a) < lda*(n-1)+n { + panic(shortA) + } + if (incX > 0 && len(x) <= (n-1)*incX) || (incX < 0 && len(x) <= (1-n)*incX) { + panic(shortX) + } + if (incY > 0 && len(y) <= (n-1)*incY) || (incY < 0 && len(y) <= (1-n)*incY) { + panic(shortY) + } + + // Quick return if possible. + if alpha == 0 && beta == 1 { + return + } + + // Set up the start indices in X and Y. + var kx int + if incX < 0 { + kx = (1 - n) * incX + } + var ky int + if incY < 0 { + ky = (1 - n) * incY + } + + // Form y = beta*y. + if beta != 1 { + if incY == 1 { + if beta == 0 { + for i := range y[:n] { + y[i] = 0 + } + } else { + for i, v := range y[:n] { + y[i] = beta * v + } + } + } else { + iy := ky + if beta == 0 { + for i := 0; i < n; i++ { + y[iy] = 0 + iy += incY + } + } else { + for i := 0; i < n; i++ { + y[iy] = beta * y[iy] + iy += incY + } + } + } + } + + if alpha == 0 { + return + } + + // The elements of A are accessed sequentially with one pass through + // the triangular part of A. + + if uplo == blas.Upper { + // Form y when A is stored in upper triangle. + if incX == 1 && incY == 1 { + for i := 0; i < n; i++ { + tmp1 := alpha * x[i] + var tmp2 complex64 + for j := i + 1; j < n; j++ { + y[j] += tmp1 * cmplx.Conj(a[i*lda+j]) + tmp2 += a[i*lda+j] * x[j] + } + aii := complex(real(a[i*lda+i]), 0) + y[i] += tmp1*aii + alpha*tmp2 + } + } else { + ix := kx + iy := ky + for i := 0; i < n; i++ { + tmp1 := alpha * x[ix] + var tmp2 complex64 + jx := ix + jy := iy + for j := i + 1; j < n; j++ { + jx += incX + jy += incY + y[jy] += tmp1 * cmplx.Conj(a[i*lda+j]) + tmp2 += a[i*lda+j] * x[jx] + } + aii := complex(real(a[i*lda+i]), 0) + y[iy] += tmp1*aii + alpha*tmp2 + ix += incX + iy += incY + } + } + return + } + + // Form y when A is stored in lower triangle. + if incX == 1 && incY == 1 { + for i := 0; i < n; i++ { + tmp1 := alpha * x[i] + var tmp2 complex64 + for j := 0; j < i; j++ { + y[j] += tmp1 * cmplx.Conj(a[i*lda+j]) + tmp2 += a[i*lda+j] * x[j] + } + aii := complex(real(a[i*lda+i]), 0) + y[i] += tmp1*aii + alpha*tmp2 + } + } else { + ix := kx + iy := ky + for i := 0; i < n; i++ { + tmp1 := alpha * x[ix] + var tmp2 complex64 + jx := kx + jy := ky + for j := 0; j < i; j++ { + y[jy] += tmp1 * cmplx.Conj(a[i*lda+j]) + tmp2 += a[i*lda+j] * x[jx] + jx += incX + jy += incY + } + aii := complex(real(a[i*lda+i]), 0) + y[iy] += tmp1*aii + alpha*tmp2 + ix += incX + iy += incY + } + } +} + +// Cher performs the Hermitian rank-one operation +// +// A += alpha * x * xᴴ +// +// where A is an n×n Hermitian matrix, alpha is a real scalar, and x is an n +// element vector. On entry, the imaginary parts of the diagonal elements of A +// are ignored and assumed to be zero, on return they will be set to zero. +// +// Complex64 implementations are autogenerated and not directly tested. +func (Implementation) Cher(uplo blas.Uplo, n int, alpha float32, x []complex64, incX int, a []complex64, lda int) { + switch uplo { + default: + panic(badUplo) + case blas.Upper, blas.Lower: + } + if n < 0 { + panic(nLT0) + } + if lda < max(1, n) { + panic(badLdA) + } + if incX == 0 { + panic(zeroIncX) + } + + // Quick return if possible. + if n == 0 { + return + } + + // For zero matrix size the following slice length checks are trivially satisfied. + if (incX > 0 && len(x) <= (n-1)*incX) || (incX < 0 && len(x) <= (1-n)*incX) { + panic(shortX) + } + if len(a) < lda*(n-1)+n { + panic(shortA) + } + + // Quick return if possible. + if alpha == 0 { + return + } + + var kx int + if incX < 0 { + kx = (1 - n) * incX + } + if uplo == blas.Upper { + if incX == 1 { + for i := 0; i < n; i++ { + if x[i] != 0 { + tmp := complex(alpha*real(x[i]), alpha*imag(x[i])) + aii := real(a[i*lda+i]) + xtmp := real(tmp * cmplx.Conj(x[i])) + a[i*lda+i] = complex(aii+xtmp, 0) + for j := i + 1; j < n; j++ { + a[i*lda+j] += tmp * cmplx.Conj(x[j]) + } + } else { + aii := real(a[i*lda+i]) + a[i*lda+i] = complex(aii, 0) + } + } + return + } + + ix := kx + for i := 0; i < n; i++ { + if x[ix] != 0 { + tmp := complex(alpha*real(x[ix]), alpha*imag(x[ix])) + aii := real(a[i*lda+i]) + xtmp := real(tmp * cmplx.Conj(x[ix])) + a[i*lda+i] = complex(aii+xtmp, 0) + jx := ix + incX + for j := i + 1; j < n; j++ { + a[i*lda+j] += tmp * cmplx.Conj(x[jx]) + jx += incX + } + } else { + aii := real(a[i*lda+i]) + a[i*lda+i] = complex(aii, 0) + } + ix += incX + } + return + } + + if incX == 1 { + for i := 0; i < n; i++ { + if x[i] != 0 { + tmp := complex(alpha*real(x[i]), alpha*imag(x[i])) + for j := 0; j < i; j++ { + a[i*lda+j] += tmp * cmplx.Conj(x[j]) + } + aii := real(a[i*lda+i]) + xtmp := real(tmp * cmplx.Conj(x[i])) + a[i*lda+i] = complex(aii+xtmp, 0) + } else { + aii := real(a[i*lda+i]) + a[i*lda+i] = complex(aii, 0) + } + } + return + } + + ix := kx + for i := 0; i < n; i++ { + if x[ix] != 0 { + tmp := complex(alpha*real(x[ix]), alpha*imag(x[ix])) + jx := kx + for j := 0; j < i; j++ { + a[i*lda+j] += tmp * cmplx.Conj(x[jx]) + jx += incX + } + aii := real(a[i*lda+i]) + xtmp := real(tmp * cmplx.Conj(x[ix])) + a[i*lda+i] = complex(aii+xtmp, 0) + + } else { + aii := real(a[i*lda+i]) + a[i*lda+i] = complex(aii, 0) + } + ix += incX + } +} + +// Cher2 performs the Hermitian rank-two operation +// +// A += alpha * x * yᴴ + conj(alpha) * y * xᴴ +// +// where alpha is a scalar, x and y are n element vectors and A is an n×n +// Hermitian matrix. On entry, the imaginary parts of the diagonal elements are +// ignored and assumed to be zero. On return they will be set to zero. +// +// Complex64 implementations are autogenerated and not directly tested. +func (Implementation) Cher2(uplo blas.Uplo, n int, alpha complex64, x []complex64, incX int, y []complex64, incY int, a []complex64, lda int) { + switch uplo { + default: + panic(badUplo) + case blas.Upper, blas.Lower: + } + if n < 0 { + panic(nLT0) + } + if lda < max(1, n) { + panic(badLdA) + } + if incX == 0 { + panic(zeroIncX) + } + if incY == 0 { + panic(zeroIncY) + } + + // Quick return if possible. + if n == 0 { + return + } + + // For zero matrix size the following slice length checks are trivially satisfied. + if (incX > 0 && len(x) <= (n-1)*incX) || (incX < 0 && len(x) <= (1-n)*incX) { + panic(shortX) + } + if (incY > 0 && len(y) <= (n-1)*incY) || (incY < 0 && len(y) <= (1-n)*incY) { + panic(shortY) + } + if len(a) < lda*(n-1)+n { + panic(shortA) + } + + // Quick return if possible. + if alpha == 0 { + return + } + + var kx, ky int + var ix, iy int + if incX != 1 || incY != 1 { + if incX < 0 { + kx = (1 - n) * incX + } + if incY < 0 { + ky = (1 - n) * incY + } + ix = kx + iy = ky + } + if uplo == blas.Upper { + if incX == 1 && incY == 1 { + for i := 0; i < n; i++ { + if x[i] != 0 || y[i] != 0 { + tmp1 := alpha * x[i] + tmp2 := cmplx.Conj(alpha) * y[i] + aii := real(a[i*lda+i]) + real(tmp1*cmplx.Conj(y[i])) + real(tmp2*cmplx.Conj(x[i])) + a[i*lda+i] = complex(aii, 0) + for j := i + 1; j < n; j++ { + a[i*lda+j] += tmp1*cmplx.Conj(y[j]) + tmp2*cmplx.Conj(x[j]) + } + } else { + aii := real(a[i*lda+i]) + a[i*lda+i] = complex(aii, 0) + } + } + return + } + for i := 0; i < n; i++ { + if x[ix] != 0 || y[iy] != 0 { + tmp1 := alpha * x[ix] + tmp2 := cmplx.Conj(alpha) * y[iy] + aii := real(a[i*lda+i]) + real(tmp1*cmplx.Conj(y[iy])) + real(tmp2*cmplx.Conj(x[ix])) + a[i*lda+i] = complex(aii, 0) + jx := ix + incX + jy := iy + incY + for j := i + 1; j < n; j++ { + a[i*lda+j] += tmp1*cmplx.Conj(y[jy]) + tmp2*cmplx.Conj(x[jx]) + jx += incX + jy += incY + } + } else { + aii := real(a[i*lda+i]) + a[i*lda+i] = complex(aii, 0) + } + ix += incX + iy += incY + } + return + } + + if incX == 1 && incY == 1 { + for i := 0; i < n; i++ { + if x[i] != 0 || y[i] != 0 { + tmp1 := alpha * x[i] + tmp2 := cmplx.Conj(alpha) * y[i] + for j := 0; j < i; j++ { + a[i*lda+j] += tmp1*cmplx.Conj(y[j]) + tmp2*cmplx.Conj(x[j]) + } + aii := real(a[i*lda+i]) + real(tmp1*cmplx.Conj(y[i])) + real(tmp2*cmplx.Conj(x[i])) + a[i*lda+i] = complex(aii, 0) + } else { + aii := real(a[i*lda+i]) + a[i*lda+i] = complex(aii, 0) + } + } + return + } + for i := 0; i < n; i++ { + if x[ix] != 0 || y[iy] != 0 { + tmp1 := alpha * x[ix] + tmp2 := cmplx.Conj(alpha) * y[iy] + jx := kx + jy := ky + for j := 0; j < i; j++ { + a[i*lda+j] += tmp1*cmplx.Conj(y[jy]) + tmp2*cmplx.Conj(x[jx]) + jx += incX + jy += incY + } + aii := real(a[i*lda+i]) + real(tmp1*cmplx.Conj(y[iy])) + real(tmp2*cmplx.Conj(x[ix])) + a[i*lda+i] = complex(aii, 0) + } else { + aii := real(a[i*lda+i]) + a[i*lda+i] = complex(aii, 0) + } + ix += incX + iy += incY + } +} + +// Chpmv performs the matrix-vector operation +// +// y = alpha * A * x + beta * y +// +// where alpha and beta are scalars, x and y are vectors, and A is an n×n +// Hermitian matrix in packed form. The imaginary parts of the diagonal +// elements of A are ignored and assumed to be zero. +// +// Complex64 implementations are autogenerated and not directly tested. +func (Implementation) Chpmv(uplo blas.Uplo, n int, alpha complex64, ap []complex64, x []complex64, incX int, beta complex64, y []complex64, incY int) { + switch uplo { + default: + panic(badUplo) + case blas.Upper, blas.Lower: + } + if n < 0 { + panic(nLT0) + } + if incX == 0 { + panic(zeroIncX) + } + if incY == 0 { + panic(zeroIncY) + } + + // Quick return if possible. + if n == 0 { + return + } + + // For zero matrix size the following slice length checks are trivially satisfied. + if len(ap) < n*(n+1)/2 { + panic(shortAP) + } + if (incX > 0 && len(x) <= (n-1)*incX) || (incX < 0 && len(x) <= (1-n)*incX) { + panic(shortX) + } + if (incY > 0 && len(y) <= (n-1)*incY) || (incY < 0 && len(y) <= (1-n)*incY) { + panic(shortY) + } + + // Quick return if possible. + if alpha == 0 && beta == 1 { + return + } + + // Set up the start indices in X and Y. + var kx int + if incX < 0 { + kx = (1 - n) * incX + } + var ky int + if incY < 0 { + ky = (1 - n) * incY + } + + // Form y = beta*y. + if beta != 1 { + if incY == 1 { + if beta == 0 { + for i := range y[:n] { + y[i] = 0 + } + } else { + for i, v := range y[:n] { + y[i] = beta * v + } + } + } else { + iy := ky + if beta == 0 { + for i := 0; i < n; i++ { + y[iy] = 0 + iy += incY + } + } else { + for i := 0; i < n; i++ { + y[iy] *= beta + iy += incY + } + } + } + } + + if alpha == 0 { + return + } + + // The elements of A are accessed sequentially with one pass through ap. + + var kk int + if uplo == blas.Upper { + // Form y when ap contains the upper triangle. + // Here, kk points to the current diagonal element in ap. + if incX == 1 && incY == 1 { + for i := 0; i < n; i++ { + tmp1 := alpha * x[i] + y[i] += tmp1 * complex(real(ap[kk]), 0) + var tmp2 complex64 + k := kk + 1 + for j := i + 1; j < n; j++ { + y[j] += tmp1 * cmplx.Conj(ap[k]) + tmp2 += ap[k] * x[j] + k++ + } + y[i] += alpha * tmp2 + kk += n - i + } + } else { + ix := kx + iy := ky + for i := 0; i < n; i++ { + tmp1 := alpha * x[ix] + y[iy] += tmp1 * complex(real(ap[kk]), 0) + var tmp2 complex64 + jx := ix + jy := iy + for k := kk + 1; k < kk+n-i; k++ { + jx += incX + jy += incY + y[jy] += tmp1 * cmplx.Conj(ap[k]) + tmp2 += ap[k] * x[jx] + } + y[iy] += alpha * tmp2 + ix += incX + iy += incY + kk += n - i + } + } + return + } + + // Form y when ap contains the lower triangle. + // Here, kk points to the beginning of current row in ap. + if incX == 1 && incY == 1 { + for i := 0; i < n; i++ { + tmp1 := alpha * x[i] + var tmp2 complex64 + k := kk + for j := 0; j < i; j++ { + y[j] += tmp1 * cmplx.Conj(ap[k]) + tmp2 += ap[k] * x[j] + k++ + } + aii := complex(real(ap[kk+i]), 0) + y[i] += tmp1*aii + alpha*tmp2 + kk += i + 1 + } + } else { + ix := kx + iy := ky + for i := 0; i < n; i++ { + tmp1 := alpha * x[ix] + var tmp2 complex64 + jx := kx + jy := ky + for k := kk; k < kk+i; k++ { + y[jy] += tmp1 * cmplx.Conj(ap[k]) + tmp2 += ap[k] * x[jx] + jx += incX + jy += incY + } + aii := complex(real(ap[kk+i]), 0) + y[iy] += tmp1*aii + alpha*tmp2 + ix += incX + iy += incY + kk += i + 1 + } + } +} + +// Chpr performs the Hermitian rank-1 operation +// +// A += alpha * x * xᴴ +// +// where alpha is a real scalar, x is a vector, and A is an n×n hermitian matrix +// in packed form. On entry, the imaginary parts of the diagonal elements are +// assumed to be zero, and on return they are set to zero. +// +// Complex64 implementations are autogenerated and not directly tested. +func (Implementation) Chpr(uplo blas.Uplo, n int, alpha float32, x []complex64, incX int, ap []complex64) { + switch uplo { + default: + panic(badUplo) + case blas.Upper, blas.Lower: + } + if n < 0 { + panic(nLT0) + } + if incX == 0 { + panic(zeroIncX) + } + + // Quick return if possible. + if n == 0 { + return + } + + // For zero matrix size the following slice length checks are trivially satisfied. + if (incX > 0 && len(x) <= (n-1)*incX) || (incX < 0 && len(x) <= (1-n)*incX) { + panic(shortX) + } + if len(ap) < n*(n+1)/2 { + panic(shortAP) + } + + // Quick return if possible. + if alpha == 0 { + return + } + + // Set up start index in X. + var kx int + if incX < 0 { + kx = (1 - n) * incX + } + + // The elements of A are accessed sequentially with one pass through ap. + + var kk int + if uplo == blas.Upper { + // Form A when upper triangle is stored in AP. + // Here, kk points to the current diagonal element in ap. + if incX == 1 { + for i := 0; i < n; i++ { + xi := x[i] + if xi != 0 { + aii := real(ap[kk]) + alpha*real(cmplx.Conj(xi)*xi) + ap[kk] = complex(aii, 0) + + tmp := complex(alpha, 0) * xi + a := ap[kk+1 : kk+n-i] + x := x[i+1 : n] + for j, v := range x { + a[j] += tmp * cmplx.Conj(v) + } + } else { + ap[kk] = complex(real(ap[kk]), 0) + } + kk += n - i + } + } else { + ix := kx + for i := 0; i < n; i++ { + xi := x[ix] + if xi != 0 { + aii := real(ap[kk]) + alpha*real(cmplx.Conj(xi)*xi) + ap[kk] = complex(aii, 0) + + tmp := complex(alpha, 0) * xi + jx := ix + incX + a := ap[kk+1 : kk+n-i] + for k := range a { + a[k] += tmp * cmplx.Conj(x[jx]) + jx += incX + } + } else { + ap[kk] = complex(real(ap[kk]), 0) + } + ix += incX + kk += n - i + } + } + return + } + + // Form A when lower triangle is stored in AP. + // Here, kk points to the beginning of current row in ap. + if incX == 1 { + for i := 0; i < n; i++ { + xi := x[i] + if xi != 0 { + tmp := complex(alpha, 0) * xi + a := ap[kk : kk+i] + for j, v := range x[:i] { + a[j] += tmp * cmplx.Conj(v) + } + + aii := real(ap[kk+i]) + alpha*real(cmplx.Conj(xi)*xi) + ap[kk+i] = complex(aii, 0) + } else { + ap[kk+i] = complex(real(ap[kk+i]), 0) + } + kk += i + 1 + } + } else { + ix := kx + for i := 0; i < n; i++ { + xi := x[ix] + if xi != 0 { + tmp := complex(alpha, 0) * xi + a := ap[kk : kk+i] + jx := kx + for k := range a { + a[k] += tmp * cmplx.Conj(x[jx]) + jx += incX + } + + aii := real(ap[kk+i]) + alpha*real(cmplx.Conj(xi)*xi) + ap[kk+i] = complex(aii, 0) + } else { + ap[kk+i] = complex(real(ap[kk+i]), 0) + } + ix += incX + kk += i + 1 + } + } +} + +// Chpr2 performs the Hermitian rank-2 operation +// +// A += alpha * x * yᴴ + conj(alpha) * y * xᴴ +// +// where alpha is a complex scalar, x and y are n element vectors, and A is an +// n×n Hermitian matrix, supplied in packed form. On entry, the imaginary parts +// of the diagonal elements are assumed to be zero, and on return they are set to zero. +// +// Complex64 implementations are autogenerated and not directly tested. +func (Implementation) Chpr2(uplo blas.Uplo, n int, alpha complex64, x []complex64, incX int, y []complex64, incY int, ap []complex64) { + switch uplo { + default: + panic(badUplo) + case blas.Upper, blas.Lower: + } + if n < 0 { + panic(nLT0) + } + if incX == 0 { + panic(zeroIncX) + } + if incY == 0 { + panic(zeroIncY) + } + + // Quick return if possible. + if n == 0 { + return + } + + // For zero matrix size the following slice length checks are trivially satisfied. + if (incX > 0 && len(x) <= (n-1)*incX) || (incX < 0 && len(x) <= (1-n)*incX) { + panic(shortX) + } + if (incY > 0 && len(y) <= (n-1)*incY) || (incY < 0 && len(y) <= (1-n)*incY) { + panic(shortY) + } + if len(ap) < n*(n+1)/2 { + panic(shortAP) + } + + // Quick return if possible. + if alpha == 0 { + return + } + + // Set up start indices in X and Y. + var kx int + if incX < 0 { + kx = (1 - n) * incX + } + var ky int + if incY < 0 { + ky = (1 - n) * incY + } + + // The elements of A are accessed sequentially with one pass through ap. + + var kk int + if uplo == blas.Upper { + // Form A when upper triangle is stored in AP. + // Here, kk points to the current diagonal element in ap. + if incX == 1 && incY == 1 { + for i := 0; i < n; i++ { + if x[i] != 0 || y[i] != 0 { + tmp1 := alpha * x[i] + tmp2 := cmplx.Conj(alpha) * y[i] + aii := real(ap[kk]) + real(tmp1*cmplx.Conj(y[i])) + real(tmp2*cmplx.Conj(x[i])) + ap[kk] = complex(aii, 0) + k := kk + 1 + for j := i + 1; j < n; j++ { + ap[k] += tmp1*cmplx.Conj(y[j]) + tmp2*cmplx.Conj(x[j]) + k++ + } + } else { + ap[kk] = complex(real(ap[kk]), 0) + } + kk += n - i + } + } else { + ix := kx + iy := ky + for i := 0; i < n; i++ { + if x[ix] != 0 || y[iy] != 0 { + tmp1 := alpha * x[ix] + tmp2 := cmplx.Conj(alpha) * y[iy] + aii := real(ap[kk]) + real(tmp1*cmplx.Conj(y[iy])) + real(tmp2*cmplx.Conj(x[ix])) + ap[kk] = complex(aii, 0) + jx := ix + incX + jy := iy + incY + for k := kk + 1; k < kk+n-i; k++ { + ap[k] += tmp1*cmplx.Conj(y[jy]) + tmp2*cmplx.Conj(x[jx]) + jx += incX + jy += incY + } + } else { + ap[kk] = complex(real(ap[kk]), 0) + } + ix += incX + iy += incY + kk += n - i + } + } + return + } + + // Form A when lower triangle is stored in AP. + // Here, kk points to the beginning of current row in ap. + if incX == 1 && incY == 1 { + for i := 0; i < n; i++ { + if x[i] != 0 || y[i] != 0 { + tmp1 := alpha * x[i] + tmp2 := cmplx.Conj(alpha) * y[i] + k := kk + for j := 0; j < i; j++ { + ap[k] += tmp1*cmplx.Conj(y[j]) + tmp2*cmplx.Conj(x[j]) + k++ + } + aii := real(ap[kk+i]) + real(tmp1*cmplx.Conj(y[i])) + real(tmp2*cmplx.Conj(x[i])) + ap[kk+i] = complex(aii, 0) + } else { + ap[kk+i] = complex(real(ap[kk+i]), 0) + } + kk += i + 1 + } + } else { + ix := kx + iy := ky + for i := 0; i < n; i++ { + if x[ix] != 0 || y[iy] != 0 { + tmp1 := alpha * x[ix] + tmp2 := cmplx.Conj(alpha) * y[iy] + jx := kx + jy := ky + for k := kk; k < kk+i; k++ { + ap[k] += tmp1*cmplx.Conj(y[jy]) + tmp2*cmplx.Conj(x[jx]) + jx += incX + jy += incY + } + aii := real(ap[kk+i]) + real(tmp1*cmplx.Conj(y[iy])) + real(tmp2*cmplx.Conj(x[ix])) + ap[kk+i] = complex(aii, 0) + } else { + ap[kk+i] = complex(real(ap[kk+i]), 0) + } + ix += incX + iy += incY + kk += i + 1 + } + } +} + +// Ctbmv performs one of the matrix-vector operations +// +// x = A * x if trans = blas.NoTrans +// x = Aᵀ * x if trans = blas.Trans +// x = Aᴴ * x if trans = blas.ConjTrans +// +// where x is an n element vector and A is an n×n triangular band matrix, with +// (k+1) diagonals. +// +// Complex64 implementations are autogenerated and not directly tested. +func (Implementation) Ctbmv(uplo blas.Uplo, trans blas.Transpose, diag blas.Diag, n, k int, a []complex64, lda int, x []complex64, incX int) { + switch trans { + default: + panic(badTranspose) + case blas.NoTrans, blas.Trans, blas.ConjTrans: + } + switch uplo { + default: + panic(badUplo) + case blas.Upper, blas.Lower: + } + switch diag { + default: + panic(badDiag) + case blas.NonUnit, blas.Unit: + } + if n < 0 { + panic(nLT0) + } + if k < 0 { + panic(kLT0) + } + if lda < k+1 { + panic(badLdA) + } + if incX == 0 { + panic(zeroIncX) + } + + // Quick return if possible. + if n == 0 { + return + } + + // For zero matrix size the following slice length checks are trivially satisfied. + if len(a) < lda*(n-1)+k+1 { + panic(shortA) + } + if (incX > 0 && len(x) <= (n-1)*incX) || (incX < 0 && len(x) <= (1-n)*incX) { + panic(shortX) + } + + // Set up start index in X. + var kx int + if incX < 0 { + kx = (1 - n) * incX + } + + switch trans { + case blas.NoTrans: + if uplo == blas.Upper { + if incX == 1 { + for i := 0; i < n; i++ { + xi := x[i] + if diag == blas.NonUnit { + xi *= a[i*lda] + } + kk := min(k, n-i-1) + for j, aij := range a[i*lda+1 : i*lda+kk+1] { + xi += x[i+j+1] * aij + } + x[i] = xi + } + } else { + ix := kx + for i := 0; i < n; i++ { + xi := x[ix] + if diag == blas.NonUnit { + xi *= a[i*lda] + } + kk := min(k, n-i-1) + jx := ix + incX + for _, aij := range a[i*lda+1 : i*lda+kk+1] { + xi += x[jx] * aij + jx += incX + } + x[ix] = xi + ix += incX + } + } + } else { + if incX == 1 { + for i := n - 1; i >= 0; i-- { + xi := x[i] + if diag == blas.NonUnit { + xi *= a[i*lda+k] + } + kk := min(k, i) + for j, aij := range a[i*lda+k-kk : i*lda+k] { + xi += x[i-kk+j] * aij + } + x[i] = xi + } + } else { + ix := kx + (n-1)*incX + for i := n - 1; i >= 0; i-- { + xi := x[ix] + if diag == blas.NonUnit { + xi *= a[i*lda+k] + } + kk := min(k, i) + jx := ix - kk*incX + for _, aij := range a[i*lda+k-kk : i*lda+k] { + xi += x[jx] * aij + jx += incX + } + x[ix] = xi + ix -= incX + } + } + } + case blas.Trans: + if uplo == blas.Upper { + if incX == 1 { + for i := n - 1; i >= 0; i-- { + kk := min(k, n-i-1) + xi := x[i] + for j, aij := range a[i*lda+1 : i*lda+kk+1] { + x[i+j+1] += xi * aij + } + if diag == blas.NonUnit { + x[i] *= a[i*lda] + } + } + } else { + ix := kx + (n-1)*incX + for i := n - 1; i >= 0; i-- { + kk := min(k, n-i-1) + jx := ix + incX + xi := x[ix] + for _, aij := range a[i*lda+1 : i*lda+kk+1] { + x[jx] += xi * aij + jx += incX + } + if diag == blas.NonUnit { + x[ix] *= a[i*lda] + } + ix -= incX + } + } + } else { + if incX == 1 { + for i := 0; i < n; i++ { + kk := min(k, i) + xi := x[i] + for j, aij := range a[i*lda+k-kk : i*lda+k] { + x[i-kk+j] += xi * aij + } + if diag == blas.NonUnit { + x[i] *= a[i*lda+k] + } + } + } else { + ix := kx + for i := 0; i < n; i++ { + kk := min(k, i) + jx := ix - kk*incX + xi := x[ix] + for _, aij := range a[i*lda+k-kk : i*lda+k] { + x[jx] += xi * aij + jx += incX + } + if diag == blas.NonUnit { + x[ix] *= a[i*lda+k] + } + ix += incX + } + } + } + case blas.ConjTrans: + if uplo == blas.Upper { + if incX == 1 { + for i := n - 1; i >= 0; i-- { + kk := min(k, n-i-1) + xi := x[i] + for j, aij := range a[i*lda+1 : i*lda+kk+1] { + x[i+j+1] += xi * cmplx.Conj(aij) + } + if diag == blas.NonUnit { + x[i] *= cmplx.Conj(a[i*lda]) + } + } + } else { + ix := kx + (n-1)*incX + for i := n - 1; i >= 0; i-- { + kk := min(k, n-i-1) + jx := ix + incX + xi := x[ix] + for _, aij := range a[i*lda+1 : i*lda+kk+1] { + x[jx] += xi * cmplx.Conj(aij) + jx += incX + } + if diag == blas.NonUnit { + x[ix] *= cmplx.Conj(a[i*lda]) + } + ix -= incX + } + } + } else { + if incX == 1 { + for i := 0; i < n; i++ { + kk := min(k, i) + xi := x[i] + for j, aij := range a[i*lda+k-kk : i*lda+k] { + x[i-kk+j] += xi * cmplx.Conj(aij) + } + if diag == blas.NonUnit { + x[i] *= cmplx.Conj(a[i*lda+k]) + } + } + } else { + ix := kx + for i := 0; i < n; i++ { + kk := min(k, i) + jx := ix - kk*incX + xi := x[ix] + for _, aij := range a[i*lda+k-kk : i*lda+k] { + x[jx] += xi * cmplx.Conj(aij) + jx += incX + } + if diag == blas.NonUnit { + x[ix] *= cmplx.Conj(a[i*lda+k]) + } + ix += incX + } + } + } + } +} + +// Ctbsv solves one of the systems of equations +// +// A * x = b if trans == blas.NoTrans +// Aᵀ * x = b if trans == blas.Trans +// Aᴴ * x = b if trans == blas.ConjTrans +// +// where b and x are n element vectors and A is an n×n triangular band matrix +// with (k+1) diagonals. +// +// On entry, x contains the values of b, and the solution is +// stored in-place into x. +// +// No test for singularity or near-singularity is included in this +// routine. Such tests must be performed before calling this routine. +// +// Complex64 implementations are autogenerated and not directly tested. +func (Implementation) Ctbsv(uplo blas.Uplo, trans blas.Transpose, diag blas.Diag, n, k int, a []complex64, lda int, x []complex64, incX int) { + switch trans { + default: + panic(badTranspose) + case blas.NoTrans, blas.Trans, blas.ConjTrans: + } + switch uplo { + default: + panic(badUplo) + case blas.Upper, blas.Lower: + } + switch diag { + default: + panic(badDiag) + case blas.NonUnit, blas.Unit: + } + if n < 0 { + panic(nLT0) + } + if k < 0 { + panic(kLT0) + } + if lda < k+1 { + panic(badLdA) + } + if incX == 0 { + panic(zeroIncX) + } + + // Quick return if possible. + if n == 0 { + return + } + + // For zero matrix size the following slice length checks are trivially satisfied. + if len(a) < lda*(n-1)+k+1 { + panic(shortA) + } + if (incX > 0 && len(x) <= (n-1)*incX) || (incX < 0 && len(x) <= (1-n)*incX) { + panic(shortX) + } + + // Set up start index in X. + var kx int + if incX < 0 { + kx = (1 - n) * incX + } + + switch trans { + case blas.NoTrans: + if uplo == blas.Upper { + if incX == 1 { + for i := n - 1; i >= 0; i-- { + kk := min(k, n-i-1) + var sum complex64 + for j, aij := range a[i*lda+1 : i*lda+kk+1] { + sum += x[i+1+j] * aij + } + x[i] -= sum + if diag == blas.NonUnit { + x[i] /= a[i*lda] + } + } + } else { + ix := kx + (n-1)*incX + for i := n - 1; i >= 0; i-- { + kk := min(k, n-i-1) + var sum complex64 + jx := ix + incX + for _, aij := range a[i*lda+1 : i*lda+kk+1] { + sum += x[jx] * aij + jx += incX + } + x[ix] -= sum + if diag == blas.NonUnit { + x[ix] /= a[i*lda] + } + ix -= incX + } + } + } else { + if incX == 1 { + for i := 0; i < n; i++ { + kk := min(k, i) + var sum complex64 + for j, aij := range a[i*lda+k-kk : i*lda+k] { + sum += x[i-kk+j] * aij + } + x[i] -= sum + if diag == blas.NonUnit { + x[i] /= a[i*lda+k] + } + } + } else { + ix := kx + for i := 0; i < n; i++ { + kk := min(k, i) + var sum complex64 + jx := ix - kk*incX + for _, aij := range a[i*lda+k-kk : i*lda+k] { + sum += x[jx] * aij + jx += incX + } + x[ix] -= sum + if diag == blas.NonUnit { + x[ix] /= a[i*lda+k] + } + ix += incX + } + } + } + case blas.Trans: + if uplo == blas.Upper { + if incX == 1 { + for i := 0; i < n; i++ { + if diag == blas.NonUnit { + x[i] /= a[i*lda] + } + kk := min(k, n-i-1) + xi := x[i] + for j, aij := range a[i*lda+1 : i*lda+kk+1] { + x[i+1+j] -= xi * aij + } + } + } else { + ix := kx + for i := 0; i < n; i++ { + if diag == blas.NonUnit { + x[ix] /= a[i*lda] + } + kk := min(k, n-i-1) + xi := x[ix] + jx := ix + incX + for _, aij := range a[i*lda+1 : i*lda+kk+1] { + x[jx] -= xi * aij + jx += incX + } + ix += incX + } + } + } else { + if incX == 1 { + for i := n - 1; i >= 0; i-- { + if diag == blas.NonUnit { + x[i] /= a[i*lda+k] + } + kk := min(k, i) + xi := x[i] + for j, aij := range a[i*lda+k-kk : i*lda+k] { + x[i-kk+j] -= xi * aij + } + } + } else { + ix := kx + (n-1)*incX + for i := n - 1; i >= 0; i-- { + if diag == blas.NonUnit { + x[ix] /= a[i*lda+k] + } + kk := min(k, i) + xi := x[ix] + jx := ix - kk*incX + for _, aij := range a[i*lda+k-kk : i*lda+k] { + x[jx] -= xi * aij + jx += incX + } + ix -= incX + } + } + } + case blas.ConjTrans: + if uplo == blas.Upper { + if incX == 1 { + for i := 0; i < n; i++ { + if diag == blas.NonUnit { + x[i] /= cmplx.Conj(a[i*lda]) + } + kk := min(k, n-i-1) + xi := x[i] + for j, aij := range a[i*lda+1 : i*lda+kk+1] { + x[i+1+j] -= xi * cmplx.Conj(aij) + } + } + } else { + ix := kx + for i := 0; i < n; i++ { + if diag == blas.NonUnit { + x[ix] /= cmplx.Conj(a[i*lda]) + } + kk := min(k, n-i-1) + xi := x[ix] + jx := ix + incX + for _, aij := range a[i*lda+1 : i*lda+kk+1] { + x[jx] -= xi * cmplx.Conj(aij) + jx += incX + } + ix += incX + } + } + } else { + if incX == 1 { + for i := n - 1; i >= 0; i-- { + if diag == blas.NonUnit { + x[i] /= cmplx.Conj(a[i*lda+k]) + } + kk := min(k, i) + xi := x[i] + for j, aij := range a[i*lda+k-kk : i*lda+k] { + x[i-kk+j] -= xi * cmplx.Conj(aij) + } + } + } else { + ix := kx + (n-1)*incX + for i := n - 1; i >= 0; i-- { + if diag == blas.NonUnit { + x[ix] /= cmplx.Conj(a[i*lda+k]) + } + kk := min(k, i) + xi := x[ix] + jx := ix - kk*incX + for _, aij := range a[i*lda+k-kk : i*lda+k] { + x[jx] -= xi * cmplx.Conj(aij) + jx += incX + } + ix -= incX + } + } + } + } +} + +// Ctpmv performs one of the matrix-vector operations +// +// x = A * x if trans = blas.NoTrans +// x = Aᵀ * x if trans = blas.Trans +// x = Aᴴ * x if trans = blas.ConjTrans +// +// where x is an n element vector and A is an n×n triangular matrix, supplied in +// packed form. +// +// Complex64 implementations are autogenerated and not directly tested. +func (Implementation) Ctpmv(uplo blas.Uplo, trans blas.Transpose, diag blas.Diag, n int, ap []complex64, x []complex64, incX int) { + switch uplo { + default: + panic(badUplo) + case blas.Upper, blas.Lower: + } + switch trans { + default: + panic(badTranspose) + case blas.NoTrans, blas.Trans, blas.ConjTrans: + } + switch diag { + default: + panic(badDiag) + case blas.NonUnit, blas.Unit: + } + if n < 0 { + panic(nLT0) + } + if incX == 0 { + panic(zeroIncX) + } + + // Quick return if possible. + if n == 0 { + return + } + + // For zero matrix size the following slice length checks are trivially satisfied. + if len(ap) < n*(n+1)/2 { + panic(shortAP) + } + if (incX > 0 && len(x) <= (n-1)*incX) || (incX < 0 && len(x) <= (1-n)*incX) { + panic(shortX) + } + + // Set up start index in X. + var kx int + if incX < 0 { + kx = (1 - n) * incX + } + + // The elements of A are accessed sequentially with one pass through A. + + if trans == blas.NoTrans { + // Form x = A*x. + if uplo == blas.Upper { + // kk points to the current diagonal element in ap. + kk := 0 + if incX == 1 { + x = x[:n] + for i := range x { + if diag == blas.NonUnit { + x[i] *= ap[kk] + } + if n-i-1 > 0 { + x[i] += c64.DotuUnitary(ap[kk+1:kk+n-i], x[i+1:]) + } + kk += n - i + } + } else { + ix := kx + for i := 0; i < n; i++ { + if diag == blas.NonUnit { + x[ix] *= ap[kk] + } + if n-i-1 > 0 { + x[ix] += c64.DotuInc(ap[kk+1:kk+n-i], x, uintptr(n-i-1), 1, uintptr(incX), 0, uintptr(ix+incX)) + } + ix += incX + kk += n - i + } + } + } else { + // kk points to the beginning of current row in ap. + kk := n*(n+1)/2 - n + if incX == 1 { + for i := n - 1; i >= 0; i-- { + if diag == blas.NonUnit { + x[i] *= ap[kk+i] + } + if i > 0 { + x[i] += c64.DotuUnitary(ap[kk:kk+i], x[:i]) + } + kk -= i + } + } else { + ix := kx + (n-1)*incX + for i := n - 1; i >= 0; i-- { + if diag == blas.NonUnit { + x[ix] *= ap[kk+i] + } + if i > 0 { + x[ix] += c64.DotuInc(ap[kk:kk+i], x, uintptr(i), 1, uintptr(incX), 0, uintptr(kx)) + } + ix -= incX + kk -= i + } + } + } + return + } + + if trans == blas.Trans { + // Form x = Aᵀ*x. + if uplo == blas.Upper { + // kk points to the current diagonal element in ap. + kk := n*(n+1)/2 - 1 + if incX == 1 { + for i := n - 1; i >= 0; i-- { + xi := x[i] + if diag == blas.NonUnit { + x[i] *= ap[kk] + } + if n-i-1 > 0 { + c64.AxpyUnitary(xi, ap[kk+1:kk+n-i], x[i+1:n]) + } + kk -= n - i + 1 + } + } else { + ix := kx + (n-1)*incX + for i := n - 1; i >= 0; i-- { + xi := x[ix] + if diag == blas.NonUnit { + x[ix] *= ap[kk] + } + if n-i-1 > 0 { + c64.AxpyInc(xi, ap[kk+1:kk+n-i], x, uintptr(n-i-1), 1, uintptr(incX), 0, uintptr(ix+incX)) + } + ix -= incX + kk -= n - i + 1 + } + } + } else { + // kk points to the beginning of current row in ap. + kk := 0 + if incX == 1 { + x = x[:n] + for i := range x { + if i > 0 { + c64.AxpyUnitary(x[i], ap[kk:kk+i], x[:i]) + } + if diag == blas.NonUnit { + x[i] *= ap[kk+i] + } + kk += i + 1 + } + } else { + ix := kx + for i := 0; i < n; i++ { + if i > 0 { + c64.AxpyInc(x[ix], ap[kk:kk+i], x, uintptr(i), 1, uintptr(incX), 0, uintptr(kx)) + } + if diag == blas.NonUnit { + x[ix] *= ap[kk+i] + } + ix += incX + kk += i + 1 + } + } + } + return + } + + // Form x = Aᴴ*x. + if uplo == blas.Upper { + // kk points to the current diagonal element in ap. + kk := n*(n+1)/2 - 1 + if incX == 1 { + for i := n - 1; i >= 0; i-- { + xi := x[i] + if diag == blas.NonUnit { + x[i] *= cmplx.Conj(ap[kk]) + } + k := kk + 1 + for j := i + 1; j < n; j++ { + x[j] += xi * cmplx.Conj(ap[k]) + k++ + } + kk -= n - i + 1 + } + } else { + ix := kx + (n-1)*incX + for i := n - 1; i >= 0; i-- { + xi := x[ix] + if diag == blas.NonUnit { + x[ix] *= cmplx.Conj(ap[kk]) + } + jx := ix + incX + k := kk + 1 + for j := i + 1; j < n; j++ { + x[jx] += xi * cmplx.Conj(ap[k]) + jx += incX + k++ + } + ix -= incX + kk -= n - i + 1 + } + } + } else { + // kk points to the beginning of current row in ap. + kk := 0 + if incX == 1 { + x = x[:n] + for i, xi := range x { + for j := 0; j < i; j++ { + x[j] += xi * cmplx.Conj(ap[kk+j]) + } + if diag == blas.NonUnit { + x[i] *= cmplx.Conj(ap[kk+i]) + } + kk += i + 1 + } + } else { + ix := kx + for i := 0; i < n; i++ { + xi := x[ix] + jx := kx + for j := 0; j < i; j++ { + x[jx] += xi * cmplx.Conj(ap[kk+j]) + jx += incX + } + if diag == blas.NonUnit { + x[ix] *= cmplx.Conj(ap[kk+i]) + } + ix += incX + kk += i + 1 + } + } + } +} + +// Ctpsv solves one of the systems of equations +// +// A * x = b if trans == blas.NoTrans +// Aᵀ * x = b if trans == blas.Trans +// Aᴴ * x = b if trans == blas.ConjTrans +// +// where b and x are n element vectors and A is an n×n triangular matrix in +// packed form. +// +// On entry, x contains the values of b, and the solution is +// stored in-place into x. +// +// No test for singularity or near-singularity is included in this +// routine. Such tests must be performed before calling this routine. +// +// Complex64 implementations are autogenerated and not directly tested. +func (Implementation) Ctpsv(uplo blas.Uplo, trans blas.Transpose, diag blas.Diag, n int, ap []complex64, x []complex64, incX int) { + switch uplo { + default: + panic(badUplo) + case blas.Upper, blas.Lower: + } + switch trans { + default: + panic(badTranspose) + case blas.NoTrans, blas.Trans, blas.ConjTrans: + } + switch diag { + default: + panic(badDiag) + case blas.NonUnit, blas.Unit: + } + if n < 0 { + panic(nLT0) + } + if incX == 0 { + panic(zeroIncX) + } + + // Quick return if possible. + if n == 0 { + return + } + + // For zero matrix size the following slice length checks are trivially satisfied. + if len(ap) < n*(n+1)/2 { + panic(shortAP) + } + if (incX > 0 && len(x) <= (n-1)*incX) || (incX < 0 && len(x) <= (1-n)*incX) { + panic(shortX) + } + + // Set up start index in X. + var kx int + if incX < 0 { + kx = (1 - n) * incX + } + + // The elements of A are accessed sequentially with one pass through ap. + + if trans == blas.NoTrans { + // Form x = inv(A)*x. + if uplo == blas.Upper { + kk := n*(n+1)/2 - 1 + if incX == 1 { + for i := n - 1; i >= 0; i-- { + aii := ap[kk] + if n-i-1 > 0 { + x[i] -= c64.DotuUnitary(x[i+1:n], ap[kk+1:kk+n-i]) + } + if diag == blas.NonUnit { + x[i] /= aii + } + kk -= n - i + 1 + } + } else { + ix := kx + (n-1)*incX + for i := n - 1; i >= 0; i-- { + aii := ap[kk] + if n-i-1 > 0 { + x[ix] -= c64.DotuInc(x, ap[kk+1:kk+n-i], uintptr(n-i-1), uintptr(incX), 1, uintptr(ix+incX), 0) + } + if diag == blas.NonUnit { + x[ix] /= aii + } + ix -= incX + kk -= n - i + 1 + } + } + } else { + kk := 0 + if incX == 1 { + for i := 0; i < n; i++ { + if i > 0 { + x[i] -= c64.DotuUnitary(x[:i], ap[kk:kk+i]) + } + if diag == blas.NonUnit { + x[i] /= ap[kk+i] + } + kk += i + 1 + } + } else { + ix := kx + for i := 0; i < n; i++ { + if i > 0 { + x[ix] -= c64.DotuInc(x, ap[kk:kk+i], uintptr(i), uintptr(incX), 1, uintptr(kx), 0) + } + if diag == blas.NonUnit { + x[ix] /= ap[kk+i] + } + ix += incX + kk += i + 1 + } + } + } + return + } + + if trans == blas.Trans { + // Form x = inv(Aᵀ)*x. + if uplo == blas.Upper { + kk := 0 + if incX == 1 { + for j := 0; j < n; j++ { + if diag == blas.NonUnit { + x[j] /= ap[kk] + } + if n-j-1 > 0 { + c64.AxpyUnitary(-x[j], ap[kk+1:kk+n-j], x[j+1:n]) + } + kk += n - j + } + } else { + jx := kx + for j := 0; j < n; j++ { + if diag == blas.NonUnit { + x[jx] /= ap[kk] + } + if n-j-1 > 0 { + c64.AxpyInc(-x[jx], ap[kk+1:kk+n-j], x, uintptr(n-j-1), 1, uintptr(incX), 0, uintptr(jx+incX)) + } + jx += incX + kk += n - j + } + } + } else { + kk := n*(n+1)/2 - n + if incX == 1 { + for j := n - 1; j >= 0; j-- { + if diag == blas.NonUnit { + x[j] /= ap[kk+j] + } + if j > 0 { + c64.AxpyUnitary(-x[j], ap[kk:kk+j], x[:j]) + } + kk -= j + } + } else { + jx := kx + (n-1)*incX + for j := n - 1; j >= 0; j-- { + if diag == blas.NonUnit { + x[jx] /= ap[kk+j] + } + if j > 0 { + c64.AxpyInc(-x[jx], ap[kk:kk+j], x, uintptr(j), 1, uintptr(incX), 0, uintptr(kx)) + } + jx -= incX + kk -= j + } + } + } + return + } + + // Form x = inv(Aᴴ)*x. + if uplo == blas.Upper { + kk := 0 + if incX == 1 { + for j := 0; j < n; j++ { + if diag == blas.NonUnit { + x[j] /= cmplx.Conj(ap[kk]) + } + xj := x[j] + k := kk + 1 + for i := j + 1; i < n; i++ { + x[i] -= xj * cmplx.Conj(ap[k]) + k++ + } + kk += n - j + } + } else { + jx := kx + for j := 0; j < n; j++ { + if diag == blas.NonUnit { + x[jx] /= cmplx.Conj(ap[kk]) + } + xj := x[jx] + ix := jx + incX + k := kk + 1 + for i := j + 1; i < n; i++ { + x[ix] -= xj * cmplx.Conj(ap[k]) + ix += incX + k++ + } + jx += incX + kk += n - j + } + } + } else { + kk := n*(n+1)/2 - n + if incX == 1 { + for j := n - 1; j >= 0; j-- { + if diag == blas.NonUnit { + x[j] /= cmplx.Conj(ap[kk+j]) + } + xj := x[j] + for i := 0; i < j; i++ { + x[i] -= xj * cmplx.Conj(ap[kk+i]) + } + kk -= j + } + } else { + jx := kx + (n-1)*incX + for j := n - 1; j >= 0; j-- { + if diag == blas.NonUnit { + x[jx] /= cmplx.Conj(ap[kk+j]) + } + xj := x[jx] + ix := kx + for i := 0; i < j; i++ { + x[ix] -= xj * cmplx.Conj(ap[kk+i]) + ix += incX + } + jx -= incX + kk -= j + } + } + } +} + +// Ctrmv performs one of the matrix-vector operations +// +// x = A * x if trans = blas.NoTrans +// x = Aᵀ * x if trans = blas.Trans +// x = Aᴴ * x if trans = blas.ConjTrans +// +// where x is a vector, and A is an n×n triangular matrix. +// +// Complex64 implementations are autogenerated and not directly tested. +func (Implementation) Ctrmv(uplo blas.Uplo, trans blas.Transpose, diag blas.Diag, n int, a []complex64, lda int, x []complex64, incX int) { + switch trans { + default: + panic(badTranspose) + case blas.NoTrans, blas.Trans, blas.ConjTrans: + } + switch uplo { + default: + panic(badUplo) + case blas.Upper, blas.Lower: + } + switch diag { + default: + panic(badDiag) + case blas.NonUnit, blas.Unit: + } + if n < 0 { + panic(nLT0) + } + if lda < max(1, n) { + panic(badLdA) + } + if incX == 0 { + panic(zeroIncX) + } + + // Quick return if possible. + if n == 0 { + return + } + + // For zero matrix size the following slice length checks are trivially satisfied. + if len(a) < lda*(n-1)+n { + panic(shortA) + } + if (incX > 0 && len(x) <= (n-1)*incX) || (incX < 0 && len(x) <= (1-n)*incX) { + panic(shortX) + } + + // Set up start index in X. + var kx int + if incX < 0 { + kx = (1 - n) * incX + } + + // The elements of A are accessed sequentially with one pass through A. + + if trans == blas.NoTrans { + // Form x = A*x. + if uplo == blas.Upper { + if incX == 1 { + for i := 0; i < n; i++ { + if diag == blas.NonUnit { + x[i] *= a[i*lda+i] + } + if n-i-1 > 0 { + x[i] += c64.DotuUnitary(a[i*lda+i+1:i*lda+n], x[i+1:n]) + } + } + } else { + ix := kx + for i := 0; i < n; i++ { + if diag == blas.NonUnit { + x[ix] *= a[i*lda+i] + } + if n-i-1 > 0 { + x[ix] += c64.DotuInc(a[i*lda+i+1:i*lda+n], x, uintptr(n-i-1), 1, uintptr(incX), 0, uintptr(ix+incX)) + } + ix += incX + } + } + } else { + if incX == 1 { + for i := n - 1; i >= 0; i-- { + if diag == blas.NonUnit { + x[i] *= a[i*lda+i] + } + if i > 0 { + x[i] += c64.DotuUnitary(a[i*lda:i*lda+i], x[:i]) + } + } + } else { + ix := kx + (n-1)*incX + for i := n - 1; i >= 0; i-- { + if diag == blas.NonUnit { + x[ix] *= a[i*lda+i] + } + if i > 0 { + x[ix] += c64.DotuInc(a[i*lda:i*lda+i], x, uintptr(i), 1, uintptr(incX), 0, uintptr(kx)) + } + ix -= incX + } + } + } + return + } + + if trans == blas.Trans { + // Form x = Aᵀ*x. + if uplo == blas.Upper { + if incX == 1 { + for i := n - 1; i >= 0; i-- { + xi := x[i] + if diag == blas.NonUnit { + x[i] *= a[i*lda+i] + } + if n-i-1 > 0 { + c64.AxpyUnitary(xi, a[i*lda+i+1:i*lda+n], x[i+1:n]) + } + } + } else { + ix := kx + (n-1)*incX + for i := n - 1; i >= 0; i-- { + xi := x[ix] + if diag == blas.NonUnit { + x[ix] *= a[i*lda+i] + } + if n-i-1 > 0 { + c64.AxpyInc(xi, a[i*lda+i+1:i*lda+n], x, uintptr(n-i-1), 1, uintptr(incX), 0, uintptr(ix+incX)) + } + ix -= incX + } + } + } else { + if incX == 1 { + for i := 0; i < n; i++ { + if i > 0 { + c64.AxpyUnitary(x[i], a[i*lda:i*lda+i], x[:i]) + } + if diag == blas.NonUnit { + x[i] *= a[i*lda+i] + } + } + } else { + ix := kx + for i := 0; i < n; i++ { + if i > 0 { + c64.AxpyInc(x[ix], a[i*lda:i*lda+i], x, uintptr(i), 1, uintptr(incX), 0, uintptr(kx)) + } + if diag == blas.NonUnit { + x[ix] *= a[i*lda+i] + } + ix += incX + } + } + } + return + } + + // Form x = Aᴴ*x. + if uplo == blas.Upper { + if incX == 1 { + for i := n - 1; i >= 0; i-- { + xi := x[i] + if diag == blas.NonUnit { + x[i] *= cmplx.Conj(a[i*lda+i]) + } + for j := i + 1; j < n; j++ { + x[j] += xi * cmplx.Conj(a[i*lda+j]) + } + } + } else { + ix := kx + (n-1)*incX + for i := n - 1; i >= 0; i-- { + xi := x[ix] + if diag == blas.NonUnit { + x[ix] *= cmplx.Conj(a[i*lda+i]) + } + jx := ix + incX + for j := i + 1; j < n; j++ { + x[jx] += xi * cmplx.Conj(a[i*lda+j]) + jx += incX + } + ix -= incX + } + } + } else { + if incX == 1 { + for i := 0; i < n; i++ { + for j := 0; j < i; j++ { + x[j] += x[i] * cmplx.Conj(a[i*lda+j]) + } + if diag == blas.NonUnit { + x[i] *= cmplx.Conj(a[i*lda+i]) + } + } + } else { + ix := kx + for i := 0; i < n; i++ { + jx := kx + for j := 0; j < i; j++ { + x[jx] += x[ix] * cmplx.Conj(a[i*lda+j]) + jx += incX + } + if diag == blas.NonUnit { + x[ix] *= cmplx.Conj(a[i*lda+i]) + } + ix += incX + } + } + } +} + +// Ctrsv solves one of the systems of equations +// +// A * x = b if trans == blas.NoTrans +// Aᵀ * x = b if trans == blas.Trans +// Aᴴ * x = b if trans == blas.ConjTrans +// +// where b and x are n element vectors and A is an n×n triangular matrix. +// +// On entry, x contains the values of b, and the solution is +// stored in-place into x. +// +// No test for singularity or near-singularity is included in this +// routine. Such tests must be performed before calling this routine. +// +// Complex64 implementations are autogenerated and not directly tested. +func (Implementation) Ctrsv(uplo blas.Uplo, trans blas.Transpose, diag blas.Diag, n int, a []complex64, lda int, x []complex64, incX int) { + switch trans { + default: + panic(badTranspose) + case blas.NoTrans, blas.Trans, blas.ConjTrans: + } + switch uplo { + default: + panic(badUplo) + case blas.Upper, blas.Lower: + } + switch diag { + default: + panic(badDiag) + case blas.NonUnit, blas.Unit: + } + if n < 0 { + panic(nLT0) + } + if lda < max(1, n) { + panic(badLdA) + } + if incX == 0 { + panic(zeroIncX) + } + + // Quick return if possible. + if n == 0 { + return + } + + // For zero matrix size the following slice length checks are trivially satisfied. + if len(a) < lda*(n-1)+n { + panic(shortA) + } + if (incX > 0 && len(x) <= (n-1)*incX) || (incX < 0 && len(x) <= (1-n)*incX) { + panic(shortX) + } + + // Set up start index in X. + var kx int + if incX < 0 { + kx = (1 - n) * incX + } + + // The elements of A are accessed sequentially with one pass through A. + + if trans == blas.NoTrans { + // Form x = inv(A)*x. + if uplo == blas.Upper { + if incX == 1 { + for i := n - 1; i >= 0; i-- { + aii := a[i*lda+i] + if n-i-1 > 0 { + x[i] -= c64.DotuUnitary(x[i+1:n], a[i*lda+i+1:i*lda+n]) + } + if diag == blas.NonUnit { + x[i] /= aii + } + } + } else { + ix := kx + (n-1)*incX + for i := n - 1; i >= 0; i-- { + aii := a[i*lda+i] + if n-i-1 > 0 { + x[ix] -= c64.DotuInc(x, a[i*lda+i+1:i*lda+n], uintptr(n-i-1), uintptr(incX), 1, uintptr(ix+incX), 0) + } + if diag == blas.NonUnit { + x[ix] /= aii + } + ix -= incX + } + } + } else { + if incX == 1 { + for i := 0; i < n; i++ { + if i > 0 { + x[i] -= c64.DotuUnitary(x[:i], a[i*lda:i*lda+i]) + } + if diag == blas.NonUnit { + x[i] /= a[i*lda+i] + } + } + } else { + ix := kx + for i := 0; i < n; i++ { + if i > 0 { + x[ix] -= c64.DotuInc(x, a[i*lda:i*lda+i], uintptr(i), uintptr(incX), 1, uintptr(kx), 0) + } + if diag == blas.NonUnit { + x[ix] /= a[i*lda+i] + } + ix += incX + } + } + } + return + } + + if trans == blas.Trans { + // Form x = inv(Aᵀ)*x. + if uplo == blas.Upper { + if incX == 1 { + for j := 0; j < n; j++ { + if diag == blas.NonUnit { + x[j] /= a[j*lda+j] + } + if n-j-1 > 0 { + c64.AxpyUnitary(-x[j], a[j*lda+j+1:j*lda+n], x[j+1:n]) + } + } + } else { + jx := kx + for j := 0; j < n; j++ { + if diag == blas.NonUnit { + x[jx] /= a[j*lda+j] + } + if n-j-1 > 0 { + c64.AxpyInc(-x[jx], a[j*lda+j+1:j*lda+n], x, uintptr(n-j-1), 1, uintptr(incX), 0, uintptr(jx+incX)) + } + jx += incX + } + } + } else { + if incX == 1 { + for j := n - 1; j >= 0; j-- { + if diag == blas.NonUnit { + x[j] /= a[j*lda+j] + } + xj := x[j] + if j > 0 { + c64.AxpyUnitary(-xj, a[j*lda:j*lda+j], x[:j]) + } + } + } else { + jx := kx + (n-1)*incX + for j := n - 1; j >= 0; j-- { + if diag == blas.NonUnit { + x[jx] /= a[j*lda+j] + } + if j > 0 { + c64.AxpyInc(-x[jx], a[j*lda:j*lda+j], x, uintptr(j), 1, uintptr(incX), 0, uintptr(kx)) + } + jx -= incX + } + } + } + return + } + + // Form x = inv(Aᴴ)*x. + if uplo == blas.Upper { + if incX == 1 { + for j := 0; j < n; j++ { + if diag == blas.NonUnit { + x[j] /= cmplx.Conj(a[j*lda+j]) + } + xj := x[j] + for i := j + 1; i < n; i++ { + x[i] -= xj * cmplx.Conj(a[j*lda+i]) + } + } + } else { + jx := kx + for j := 0; j < n; j++ { + if diag == blas.NonUnit { + x[jx] /= cmplx.Conj(a[j*lda+j]) + } + xj := x[jx] + ix := jx + incX + for i := j + 1; i < n; i++ { + x[ix] -= xj * cmplx.Conj(a[j*lda+i]) + ix += incX + } + jx += incX + } + } + } else { + if incX == 1 { + for j := n - 1; j >= 0; j-- { + if diag == blas.NonUnit { + x[j] /= cmplx.Conj(a[j*lda+j]) + } + xj := x[j] + for i := 0; i < j; i++ { + x[i] -= xj * cmplx.Conj(a[j*lda+i]) + } + } + } else { + jx := kx + (n-1)*incX + for j := n - 1; j >= 0; j-- { + if diag == blas.NonUnit { + x[jx] /= cmplx.Conj(a[j*lda+j]) + } + xj := x[jx] + ix := kx + for i := 0; i < j; i++ { + x[ix] -= xj * cmplx.Conj(a[j*lda+i]) + ix += incX + } + jx -= incX + } + } + } +} diff --git a/vendor/gonum.org/v1/gonum/blas/gonum/level2float32.go b/vendor/gonum.org/v1/gonum/blas/gonum/level2float32.go new file mode 100644 index 0000000000..26e4959d7f --- /dev/null +++ b/vendor/gonum.org/v1/gonum/blas/gonum/level2float32.go @@ -0,0 +1,2400 @@ +// Code generated by "go generate gonum.org/v1/gonum/blas/gonum”; DO NOT EDIT. + +// Copyright ©2014 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gonum + +import ( + "gonum.org/v1/gonum/blas" + "gonum.org/v1/gonum/internal/asm/f32" +) + +var _ blas.Float32Level2 = Implementation{} + +// Sger performs the rank-one operation +// +// A += alpha * x * yᵀ +// +// where A is an m×n dense matrix, x and y are vectors, and alpha is a scalar. +// +// Float32 implementations are autogenerated and not directly tested. +func (Implementation) Sger(m, n int, alpha float32, x []float32, incX int, y []float32, incY int, a []float32, lda int) { + if m < 0 { + panic(mLT0) + } + if n < 0 { + panic(nLT0) + } + if lda < max(1, n) { + panic(badLdA) + } + if incX == 0 { + panic(zeroIncX) + } + if incY == 0 { + panic(zeroIncY) + } + + // Quick return if possible. + if m == 0 || n == 0 { + return + } + + // For zero matrix size the following slice length checks are trivially satisfied. + if (incX > 0 && len(x) <= (m-1)*incX) || (incX < 0 && len(x) <= (1-m)*incX) { + panic(shortX) + } + if (incY > 0 && len(y) <= (n-1)*incY) || (incY < 0 && len(y) <= (1-n)*incY) { + panic(shortY) + } + if len(a) < lda*(m-1)+n { + panic(shortA) + } + + // Quick return if possible. + if alpha == 0 { + return + } + f32.Ger(uintptr(m), uintptr(n), + alpha, + x, uintptr(incX), + y, uintptr(incY), + a, uintptr(lda)) +} + +// Sgbmv performs one of the matrix-vector operations +// +// y = alpha * A * x + beta * y if tA == blas.NoTrans +// y = alpha * Aᵀ * x + beta * y if tA == blas.Trans or blas.ConjTrans +// +// where A is an m×n band matrix with kL sub-diagonals and kU super-diagonals, +// x and y are vectors, and alpha and beta are scalars. +// +// Float32 implementations are autogenerated and not directly tested. +func (Implementation) Sgbmv(tA blas.Transpose, m, n, kL, kU int, alpha float32, a []float32, lda int, x []float32, incX int, beta float32, y []float32, incY int) { + if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans { + panic(badTranspose) + } + if m < 0 { + panic(mLT0) + } + if n < 0 { + panic(nLT0) + } + if kL < 0 { + panic(kLLT0) + } + if kU < 0 { + panic(kULT0) + } + if lda < kL+kU+1 { + panic(badLdA) + } + if incX == 0 { + panic(zeroIncX) + } + if incY == 0 { + panic(zeroIncY) + } + + // Quick return if possible. + if m == 0 || n == 0 { + return + } + + // For zero matrix size the following slice length checks are trivially satisfied. + if len(a) < lda*(min(m, n+kL)-1)+kL+kU+1 { + panic(shortA) + } + lenX := m + lenY := n + if tA == blas.NoTrans { + lenX = n + lenY = m + } + if (incX > 0 && len(x) <= (lenX-1)*incX) || (incX < 0 && len(x) <= (1-lenX)*incX) { + panic(shortX) + } + if (incY > 0 && len(y) <= (lenY-1)*incY) || (incY < 0 && len(y) <= (1-lenY)*incY) { + panic(shortY) + } + + // Quick return if possible. + if alpha == 0 && beta == 1 { + return + } + + var kx, ky int + if incX < 0 { + kx = -(lenX - 1) * incX + } + if incY < 0 { + ky = -(lenY - 1) * incY + } + + // Form y = beta * y. + if beta != 1 { + if incY == 1 { + if beta == 0 { + for i := range y[:lenY] { + y[i] = 0 + } + } else { + f32.ScalUnitary(beta, y[:lenY]) + } + } else { + iy := ky + if beta == 0 { + for i := 0; i < lenY; i++ { + y[iy] = 0 + iy += incY + } + } else { + if incY > 0 { + f32.ScalInc(beta, y, uintptr(lenY), uintptr(incY)) + } else { + f32.ScalInc(beta, y, uintptr(lenY), uintptr(-incY)) + } + } + } + } + + if alpha == 0 { + return + } + + // i and j are indices of the compacted banded matrix. + // off is the offset into the dense matrix (off + j = densej) + nCol := kU + 1 + kL + if tA == blas.NoTrans { + iy := ky + if incX == 1 { + for i := 0; i < min(m, n+kL); i++ { + l := max(0, kL-i) + u := min(nCol, n+kL-i) + off := max(0, i-kL) + atmp := a[i*lda+l : i*lda+u] + xtmp := x[off : off+u-l] + var sum float32 + for j, v := range atmp { + sum += xtmp[j] * v + } + y[iy] += sum * alpha + iy += incY + } + return + } + for i := 0; i < min(m, n+kL); i++ { + l := max(0, kL-i) + u := min(nCol, n+kL-i) + off := max(0, i-kL) + atmp := a[i*lda+l : i*lda+u] + jx := kx + var sum float32 + for _, v := range atmp { + sum += x[off*incX+jx] * v + jx += incX + } + y[iy] += sum * alpha + iy += incY + } + return + } + if incX == 1 { + for i := 0; i < min(m, n+kL); i++ { + l := max(0, kL-i) + u := min(nCol, n+kL-i) + off := max(0, i-kL) + atmp := a[i*lda+l : i*lda+u] + tmp := alpha * x[i] + jy := ky + for _, v := range atmp { + y[jy+off*incY] += tmp * v + jy += incY + } + } + return + } + ix := kx + for i := 0; i < min(m, n+kL); i++ { + l := max(0, kL-i) + u := min(nCol, n+kL-i) + off := max(0, i-kL) + atmp := a[i*lda+l : i*lda+u] + tmp := alpha * x[ix] + jy := ky + for _, v := range atmp { + y[jy+off*incY] += tmp * v + jy += incY + } + ix += incX + } +} + +// Sgemv computes +// +// y = alpha * A * x + beta * y if tA = blas.NoTrans +// y = alpha * Aᵀ * x + beta * y if tA = blas.Trans or blas.ConjTrans +// +// where A is an m×n dense matrix, x and y are vectors, and alpha and beta are scalars. +// +// Float32 implementations are autogenerated and not directly tested. +func (Implementation) Sgemv(tA blas.Transpose, m, n int, alpha float32, a []float32, lda int, x []float32, incX int, beta float32, y []float32, incY int) { + if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans { + panic(badTranspose) + } + if m < 0 { + panic(mLT0) + } + if n < 0 { + panic(nLT0) + } + if lda < max(1, n) { + panic(badLdA) + } + if incX == 0 { + panic(zeroIncX) + } + if incY == 0 { + panic(zeroIncY) + } + // Set up indexes + lenX := m + lenY := n + if tA == blas.NoTrans { + lenX = n + lenY = m + } + + // Quick return if possible + if m == 0 || n == 0 { + return + } + + if (incX > 0 && (lenX-1)*incX >= len(x)) || (incX < 0 && (1-lenX)*incX >= len(x)) { + panic(shortX) + } + if (incY > 0 && (lenY-1)*incY >= len(y)) || (incY < 0 && (1-lenY)*incY >= len(y)) { + panic(shortY) + } + if len(a) < lda*(m-1)+n { + panic(shortA) + } + + // Quick return if possible + if alpha == 0 && beta == 1 { + return + } + + if alpha == 0 { + // First form y = beta * y + if incY > 0 { + Implementation{}.Sscal(lenY, beta, y, incY) + } else { + Implementation{}.Sscal(lenY, beta, y, -incY) + } + return + } + + // Form y = alpha * A * x + y + if tA == blas.NoTrans { + f32.GemvN(uintptr(m), uintptr(n), alpha, a, uintptr(lda), x, uintptr(incX), beta, y, uintptr(incY)) + return + } + // Cases where a is transposed. + f32.GemvT(uintptr(m), uintptr(n), alpha, a, uintptr(lda), x, uintptr(incX), beta, y, uintptr(incY)) +} + +// Strmv performs one of the matrix-vector operations +// +// x = A * x if tA == blas.NoTrans +// x = Aᵀ * x if tA == blas.Trans or blas.ConjTrans +// +// where A is an n×n triangular matrix, and x is a vector. +// +// Float32 implementations are autogenerated and not directly tested. +func (Implementation) Strmv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n int, a []float32, lda int, x []float32, incX int) { + if ul != blas.Lower && ul != blas.Upper { + panic(badUplo) + } + if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans { + panic(badTranspose) + } + if d != blas.NonUnit && d != blas.Unit { + panic(badDiag) + } + if n < 0 { + panic(nLT0) + } + if lda < max(1, n) { + panic(badLdA) + } + if incX == 0 { + panic(zeroIncX) + } + + // Quick return if possible. + if n == 0 { + return + } + + // For zero matrix size the following slice length checks are trivially satisfied. + if len(a) < lda*(n-1)+n { + panic(shortA) + } + if (incX > 0 && len(x) <= (n-1)*incX) || (incX < 0 && len(x) <= (1-n)*incX) { + panic(shortX) + } + + nonUnit := d != blas.Unit + if n == 1 { + if nonUnit { + x[0] *= a[0] + } + return + } + var kx int + if incX <= 0 { + kx = -(n - 1) * incX + } + if tA == blas.NoTrans { + if ul == blas.Upper { + if incX == 1 { + for i := 0; i < n; i++ { + ilda := i * lda + var tmp float32 + if nonUnit { + tmp = a[ilda+i] * x[i] + } else { + tmp = x[i] + } + x[i] = tmp + f32.DotUnitary(a[ilda+i+1:ilda+n], x[i+1:n]) + } + return + } + ix := kx + for i := 0; i < n; i++ { + ilda := i * lda + var tmp float32 + if nonUnit { + tmp = a[ilda+i] * x[ix] + } else { + tmp = x[ix] + } + x[ix] = tmp + f32.DotInc(x, a[ilda+i+1:ilda+n], uintptr(n-i-1), uintptr(incX), 1, uintptr(ix+incX), 0) + ix += incX + } + return + } + if incX == 1 { + for i := n - 1; i >= 0; i-- { + ilda := i * lda + var tmp float32 + if nonUnit { + tmp += a[ilda+i] * x[i] + } else { + tmp = x[i] + } + x[i] = tmp + f32.DotUnitary(a[ilda:ilda+i], x[:i]) + } + return + } + ix := kx + (n-1)*incX + for i := n - 1; i >= 0; i-- { + ilda := i * lda + var tmp float32 + if nonUnit { + tmp = a[ilda+i] * x[ix] + } else { + tmp = x[ix] + } + x[ix] = tmp + f32.DotInc(x, a[ilda:ilda+i], uintptr(i), uintptr(incX), 1, uintptr(kx), 0) + ix -= incX + } + return + } + // Cases where a is transposed. + if ul == blas.Upper { + if incX == 1 { + for i := n - 1; i >= 0; i-- { + ilda := i * lda + xi := x[i] + f32.AxpyUnitary(xi, a[ilda+i+1:ilda+n], x[i+1:n]) + if nonUnit { + x[i] *= a[ilda+i] + } + } + return + } + ix := kx + (n-1)*incX + for i := n - 1; i >= 0; i-- { + ilda := i * lda + xi := x[ix] + f32.AxpyInc(xi, a[ilda+i+1:ilda+n], x, uintptr(n-i-1), 1, uintptr(incX), 0, uintptr(kx+(i+1)*incX)) + if nonUnit { + x[ix] *= a[ilda+i] + } + ix -= incX + } + return + } + if incX == 1 { + for i := 0; i < n; i++ { + ilda := i * lda + xi := x[i] + f32.AxpyUnitary(xi, a[ilda:ilda+i], x[:i]) + if nonUnit { + x[i] *= a[i*lda+i] + } + } + return + } + ix := kx + for i := 0; i < n; i++ { + ilda := i * lda + xi := x[ix] + f32.AxpyInc(xi, a[ilda:ilda+i], x, uintptr(i), 1, uintptr(incX), 0, uintptr(kx)) + if nonUnit { + x[ix] *= a[ilda+i] + } + ix += incX + } +} + +// Strsv solves one of the systems of equations +// +// A * x = b if tA == blas.NoTrans +// Aᵀ * x = b if tA == blas.Trans or blas.ConjTrans +// +// where A is an n×n triangular matrix, and x and b are vectors. +// +// At entry to the function, x contains the values of b, and the result is +// stored in-place into x. +// +// No test for singularity or near-singularity is included in this +// routine. Such tests must be performed before calling this routine. +// +// Float32 implementations are autogenerated and not directly tested. +func (Implementation) Strsv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n int, a []float32, lda int, x []float32, incX int) { + if ul != blas.Lower && ul != blas.Upper { + panic(badUplo) + } + if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans { + panic(badTranspose) + } + if d != blas.NonUnit && d != blas.Unit { + panic(badDiag) + } + if n < 0 { + panic(nLT0) + } + if lda < max(1, n) { + panic(badLdA) + } + if incX == 0 { + panic(zeroIncX) + } + + // Quick return if possible. + if n == 0 { + return + } + + // For zero matrix size the following slice length checks are trivially satisfied. + if len(a) < lda*(n-1)+n { + panic(shortA) + } + if (incX > 0 && len(x) <= (n-1)*incX) || (incX < 0 && len(x) <= (1-n)*incX) { + panic(shortX) + } + + if n == 1 { + if d == blas.NonUnit { + x[0] /= a[0] + } + return + } + + var kx int + if incX < 0 { + kx = -(n - 1) * incX + } + nonUnit := d == blas.NonUnit + if tA == blas.NoTrans { + if ul == blas.Upper { + if incX == 1 { + for i := n - 1; i >= 0; i-- { + var sum float32 + atmp := a[i*lda+i+1 : i*lda+n] + for j, v := range atmp { + jv := i + j + 1 + sum += x[jv] * v + } + x[i] -= sum + if nonUnit { + x[i] /= a[i*lda+i] + } + } + return + } + ix := kx + (n-1)*incX + for i := n - 1; i >= 0; i-- { + var sum float32 + jx := ix + incX + atmp := a[i*lda+i+1 : i*lda+n] + for _, v := range atmp { + sum += x[jx] * v + jx += incX + } + x[ix] -= sum + if nonUnit { + x[ix] /= a[i*lda+i] + } + ix -= incX + } + return + } + if incX == 1 { + for i := 0; i < n; i++ { + var sum float32 + atmp := a[i*lda : i*lda+i] + for j, v := range atmp { + sum += x[j] * v + } + x[i] -= sum + if nonUnit { + x[i] /= a[i*lda+i] + } + } + return + } + ix := kx + for i := 0; i < n; i++ { + jx := kx + var sum float32 + atmp := a[i*lda : i*lda+i] + for _, v := range atmp { + sum += x[jx] * v + jx += incX + } + x[ix] -= sum + if nonUnit { + x[ix] /= a[i*lda+i] + } + ix += incX + } + return + } + // Cases where a is transposed. + if ul == blas.Upper { + if incX == 1 { + for i := 0; i < n; i++ { + if nonUnit { + x[i] /= a[i*lda+i] + } + xi := x[i] + atmp := a[i*lda+i+1 : i*lda+n] + for j, v := range atmp { + jv := j + i + 1 + x[jv] -= v * xi + } + } + return + } + ix := kx + for i := 0; i < n; i++ { + if nonUnit { + x[ix] /= a[i*lda+i] + } + xi := x[ix] + jx := kx + (i+1)*incX + atmp := a[i*lda+i+1 : i*lda+n] + for _, v := range atmp { + x[jx] -= v * xi + jx += incX + } + ix += incX + } + return + } + if incX == 1 { + for i := n - 1; i >= 0; i-- { + if nonUnit { + x[i] /= a[i*lda+i] + } + xi := x[i] + atmp := a[i*lda : i*lda+i] + for j, v := range atmp { + x[j] -= v * xi + } + } + return + } + ix := kx + (n-1)*incX + for i := n - 1; i >= 0; i-- { + if nonUnit { + x[ix] /= a[i*lda+i] + } + xi := x[ix] + jx := kx + atmp := a[i*lda : i*lda+i] + for _, v := range atmp { + x[jx] -= v * xi + jx += incX + } + ix -= incX + } +} + +// Ssymv performs the matrix-vector operation +// +// y = alpha * A * x + beta * y +// +// where A is an n×n symmetric matrix, x and y are vectors, and alpha and +// beta are scalars. +// +// Float32 implementations are autogenerated and not directly tested. +func (Implementation) Ssymv(ul blas.Uplo, n int, alpha float32, a []float32, lda int, x []float32, incX int, beta float32, y []float32, incY int) { + if ul != blas.Lower && ul != blas.Upper { + panic(badUplo) + } + if n < 0 { + panic(nLT0) + } + if lda < max(1, n) { + panic(badLdA) + } + if incX == 0 { + panic(zeroIncX) + } + if incY == 0 { + panic(zeroIncY) + } + + // Quick return if possible. + if n == 0 { + return + } + + // For zero matrix size the following slice length checks are trivially satisfied. + if len(a) < lda*(n-1)+n { + panic(shortA) + } + if (incX > 0 && len(x) <= (n-1)*incX) || (incX < 0 && len(x) <= (1-n)*incX) { + panic(shortX) + } + if (incY > 0 && len(y) <= (n-1)*incY) || (incY < 0 && len(y) <= (1-n)*incY) { + panic(shortY) + } + + // Quick return if possible. + if alpha == 0 && beta == 1 { + return + } + + // Set up start points + var kx, ky int + if incX < 0 { + kx = -(n - 1) * incX + } + if incY < 0 { + ky = -(n - 1) * incY + } + + // Form y = beta * y + if beta != 1 { + if incY == 1 { + if beta == 0 { + for i := range y[:n] { + y[i] = 0 + } + } else { + f32.ScalUnitary(beta, y[:n]) + } + } else { + iy := ky + if beta == 0 { + for i := 0; i < n; i++ { + y[iy] = 0 + iy += incY + } + } else { + if incY > 0 { + f32.ScalInc(beta, y, uintptr(n), uintptr(incY)) + } else { + f32.ScalInc(beta, y, uintptr(n), uintptr(-incY)) + } + } + } + } + + if alpha == 0 { + return + } + + if n == 1 { + y[0] += alpha * a[0] * x[0] + return + } + + if ul == blas.Upper { + if incX == 1 { + iy := ky + for i := 0; i < n; i++ { + xv := x[i] * alpha + sum := x[i] * a[i*lda+i] + jy := ky + (i+1)*incY + atmp := a[i*lda+i+1 : i*lda+n] + for j, v := range atmp { + jp := j + i + 1 + sum += x[jp] * v + y[jy] += xv * v + jy += incY + } + y[iy] += alpha * sum + iy += incY + } + return + } + ix := kx + iy := ky + for i := 0; i < n; i++ { + xv := x[ix] * alpha + sum := x[ix] * a[i*lda+i] + jx := kx + (i+1)*incX + jy := ky + (i+1)*incY + atmp := a[i*lda+i+1 : i*lda+n] + for _, v := range atmp { + sum += x[jx] * v + y[jy] += xv * v + jx += incX + jy += incY + } + y[iy] += alpha * sum + ix += incX + iy += incY + } + return + } + // Cases where a is lower triangular. + if incX == 1 { + iy := ky + for i := 0; i < n; i++ { + jy := ky + xv := alpha * x[i] + atmp := a[i*lda : i*lda+i] + var sum float32 + for j, v := range atmp { + sum += x[j] * v + y[jy] += xv * v + jy += incY + } + sum += x[i] * a[i*lda+i] + sum *= alpha + y[iy] += sum + iy += incY + } + return + } + ix := kx + iy := ky + for i := 0; i < n; i++ { + jx := kx + jy := ky + xv := alpha * x[ix] + atmp := a[i*lda : i*lda+i] + var sum float32 + for _, v := range atmp { + sum += x[jx] * v + y[jy] += xv * v + jx += incX + jy += incY + } + sum += x[ix] * a[i*lda+i] + sum *= alpha + y[iy] += sum + ix += incX + iy += incY + } +} + +// Stbmv performs one of the matrix-vector operations +// +// x = A * x if tA == blas.NoTrans +// x = Aᵀ * x if tA == blas.Trans or blas.ConjTrans +// +// where A is an n×n triangular band matrix with k+1 diagonals, and x is a vector. +// +// Float32 implementations are autogenerated and not directly tested. +func (Implementation) Stbmv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n, k int, a []float32, lda int, x []float32, incX int) { + if ul != blas.Lower && ul != blas.Upper { + panic(badUplo) + } + if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans { + panic(badTranspose) + } + if d != blas.NonUnit && d != blas.Unit { + panic(badDiag) + } + if n < 0 { + panic(nLT0) + } + if k < 0 { + panic(kLT0) + } + if lda < k+1 { + panic(badLdA) + } + if incX == 0 { + panic(zeroIncX) + } + + // Quick return if possible. + if n == 0 { + return + } + + // For zero matrix size the following slice length checks are trivially satisfied. + if len(a) < lda*(n-1)+k+1 { + panic(shortA) + } + if (incX > 0 && len(x) <= (n-1)*incX) || (incX < 0 && len(x) <= (1-n)*incX) { + panic(shortX) + } + + var kx int + if incX < 0 { + kx = -(n - 1) * incX + } + + nonunit := d != blas.Unit + + if tA == blas.NoTrans { + if ul == blas.Upper { + if incX == 1 { + for i := 0; i < n; i++ { + u := min(1+k, n-i) + var sum float32 + atmp := a[i*lda:] + xtmp := x[i:] + for j := 1; j < u; j++ { + sum += xtmp[j] * atmp[j] + } + if nonunit { + sum += xtmp[0] * atmp[0] + } else { + sum += xtmp[0] + } + x[i] = sum + } + return + } + ix := kx + for i := 0; i < n; i++ { + u := min(1+k, n-i) + var sum float32 + atmp := a[i*lda:] + jx := incX + for j := 1; j < u; j++ { + sum += x[ix+jx] * atmp[j] + jx += incX + } + if nonunit { + sum += x[ix] * atmp[0] + } else { + sum += x[ix] + } + x[ix] = sum + ix += incX + } + return + } + if incX == 1 { + for i := n - 1; i >= 0; i-- { + l := max(0, k-i) + atmp := a[i*lda:] + var sum float32 + for j := l; j < k; j++ { + sum += x[i-k+j] * atmp[j] + } + if nonunit { + sum += x[i] * atmp[k] + } else { + sum += x[i] + } + x[i] = sum + } + return + } + ix := kx + (n-1)*incX + for i := n - 1; i >= 0; i-- { + l := max(0, k-i) + atmp := a[i*lda:] + var sum float32 + jx := l * incX + for j := l; j < k; j++ { + sum += x[ix-k*incX+jx] * atmp[j] + jx += incX + } + if nonunit { + sum += x[ix] * atmp[k] + } else { + sum += x[ix] + } + x[ix] = sum + ix -= incX + } + return + } + if ul == blas.Upper { + if incX == 1 { + for i := n - 1; i >= 0; i-- { + u := k + 1 + if i < u { + u = i + 1 + } + var sum float32 + for j := 1; j < u; j++ { + sum += x[i-j] * a[(i-j)*lda+j] + } + if nonunit { + sum += x[i] * a[i*lda] + } else { + sum += x[i] + } + x[i] = sum + } + return + } + ix := kx + (n-1)*incX + for i := n - 1; i >= 0; i-- { + u := k + 1 + if i < u { + u = i + 1 + } + var sum float32 + jx := incX + for j := 1; j < u; j++ { + sum += x[ix-jx] * a[(i-j)*lda+j] + jx += incX + } + if nonunit { + sum += x[ix] * a[i*lda] + } else { + sum += x[ix] + } + x[ix] = sum + ix -= incX + } + return + } + if incX == 1 { + for i := 0; i < n; i++ { + u := k + if i+k >= n { + u = n - i - 1 + } + var sum float32 + for j := 0; j < u; j++ { + sum += x[i+j+1] * a[(i+j+1)*lda+k-j-1] + } + if nonunit { + sum += x[i] * a[i*lda+k] + } else { + sum += x[i] + } + x[i] = sum + } + return + } + ix := kx + for i := 0; i < n; i++ { + u := k + if i+k >= n { + u = n - i - 1 + } + var ( + sum float32 + jx int + ) + for j := 0; j < u; j++ { + sum += x[ix+jx+incX] * a[(i+j+1)*lda+k-j-1] + jx += incX + } + if nonunit { + sum += x[ix] * a[i*lda+k] + } else { + sum += x[ix] + } + x[ix] = sum + ix += incX + } +} + +// Stpmv performs one of the matrix-vector operations +// +// x = A * x if tA == blas.NoTrans +// x = Aᵀ * x if tA == blas.Trans or blas.ConjTrans +// +// where A is an n×n triangular matrix in packed format, and x is a vector. +// +// Float32 implementations are autogenerated and not directly tested. +func (Implementation) Stpmv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n int, ap []float32, x []float32, incX int) { + if ul != blas.Lower && ul != blas.Upper { + panic(badUplo) + } + if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans { + panic(badTranspose) + } + if d != blas.NonUnit && d != blas.Unit { + panic(badDiag) + } + if n < 0 { + panic(nLT0) + } + if incX == 0 { + panic(zeroIncX) + } + + // Quick return if possible. + if n == 0 { + return + } + + // For zero matrix size the following slice length checks are trivially satisfied. + if len(ap) < n*(n+1)/2 { + panic(shortAP) + } + if (incX > 0 && len(x) <= (n-1)*incX) || (incX < 0 && len(x) <= (1-n)*incX) { + panic(shortX) + } + + var kx int + if incX < 0 { + kx = -(n - 1) * incX + } + + nonUnit := d == blas.NonUnit + var offset int // Offset is the index of (i,i) + if tA == blas.NoTrans { + if ul == blas.Upper { + if incX == 1 { + for i := 0; i < n; i++ { + xi := x[i] + if nonUnit { + xi *= ap[offset] + } + atmp := ap[offset+1 : offset+n-i] + xtmp := x[i+1:] + for j, v := range atmp { + xi += v * xtmp[j] + } + x[i] = xi + offset += n - i + } + return + } + ix := kx + for i := 0; i < n; i++ { + xix := x[ix] + if nonUnit { + xix *= ap[offset] + } + atmp := ap[offset+1 : offset+n-i] + jx := kx + (i+1)*incX + for _, v := range atmp { + xix += v * x[jx] + jx += incX + } + x[ix] = xix + offset += n - i + ix += incX + } + return + } + if incX == 1 { + offset = n*(n+1)/2 - 1 + for i := n - 1; i >= 0; i-- { + xi := x[i] + if nonUnit { + xi *= ap[offset] + } + atmp := ap[offset-i : offset] + for j, v := range atmp { + xi += v * x[j] + } + x[i] = xi + offset -= i + 1 + } + return + } + ix := kx + (n-1)*incX + offset = n*(n+1)/2 - 1 + for i := n - 1; i >= 0; i-- { + xix := x[ix] + if nonUnit { + xix *= ap[offset] + } + atmp := ap[offset-i : offset] + jx := kx + for _, v := range atmp { + xix += v * x[jx] + jx += incX + } + x[ix] = xix + offset -= i + 1 + ix -= incX + } + return + } + // Cases where ap is transposed. + if ul == blas.Upper { + if incX == 1 { + offset = n*(n+1)/2 - 1 + for i := n - 1; i >= 0; i-- { + xi := x[i] + atmp := ap[offset+1 : offset+n-i] + xtmp := x[i+1:] + for j, v := range atmp { + xtmp[j] += v * xi + } + if nonUnit { + x[i] *= ap[offset] + } + offset -= n - i + 1 + } + return + } + ix := kx + (n-1)*incX + offset = n*(n+1)/2 - 1 + for i := n - 1; i >= 0; i-- { + xix := x[ix] + jx := kx + (i+1)*incX + atmp := ap[offset+1 : offset+n-i] + for _, v := range atmp { + x[jx] += v * xix + jx += incX + } + if nonUnit { + x[ix] *= ap[offset] + } + offset -= n - i + 1 + ix -= incX + } + return + } + if incX == 1 { + for i := 0; i < n; i++ { + xi := x[i] + atmp := ap[offset-i : offset] + for j, v := range atmp { + x[j] += v * xi + } + if nonUnit { + x[i] *= ap[offset] + } + offset += i + 2 + } + return + } + ix := kx + for i := 0; i < n; i++ { + xix := x[ix] + jx := kx + atmp := ap[offset-i : offset] + for _, v := range atmp { + x[jx] += v * xix + jx += incX + } + if nonUnit { + x[ix] *= ap[offset] + } + ix += incX + offset += i + 2 + } +} + +// Stbsv solves one of the systems of equations +// +// A * x = b if tA == blas.NoTrans +// Aᵀ * x = b if tA == blas.Trans or tA == blas.ConjTrans +// +// where A is an n×n triangular band matrix with k+1 diagonals, +// and x and b are vectors. +// +// At entry to the function, x contains the values of b, and the result is +// stored in-place into x. +// +// No test for singularity or near-singularity is included in this +// routine. Such tests must be performed before calling this routine. +// +// Float32 implementations are autogenerated and not directly tested. +func (Implementation) Stbsv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n, k int, a []float32, lda int, x []float32, incX int) { + if ul != blas.Lower && ul != blas.Upper { + panic(badUplo) + } + if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans { + panic(badTranspose) + } + if d != blas.NonUnit && d != blas.Unit { + panic(badDiag) + } + if n < 0 { + panic(nLT0) + } + if k < 0 { + panic(kLT0) + } + if lda < k+1 { + panic(badLdA) + } + if incX == 0 { + panic(zeroIncX) + } + + // Quick return if possible. + if n == 0 { + return + } + + // For zero matrix size the following slice length checks are trivially satisfied. + if len(a) < lda*(n-1)+k+1 { + panic(shortA) + } + if (incX > 0 && len(x) <= (n-1)*incX) || (incX < 0 && len(x) <= (1-n)*incX) { + panic(shortX) + } + + var kx int + if incX < 0 { + kx = -(n - 1) * incX + } + nonUnit := d == blas.NonUnit + // Form x = A^-1 x. + // Several cases below use subslices for speed improvement. + // The incX != 1 cases usually do not because incX may be negative. + if tA == blas.NoTrans { + if ul == blas.Upper { + if incX == 1 { + for i := n - 1; i >= 0; i-- { + bands := k + if i+bands >= n { + bands = n - i - 1 + } + atmp := a[i*lda+1:] + xtmp := x[i+1 : i+bands+1] + var sum float32 + for j, v := range xtmp { + sum += v * atmp[j] + } + x[i] -= sum + if nonUnit { + x[i] /= a[i*lda] + } + } + return + } + ix := kx + (n-1)*incX + for i := n - 1; i >= 0; i-- { + max := k + 1 + if i+max > n { + max = n - i + } + atmp := a[i*lda:] + var ( + jx int + sum float32 + ) + for j := 1; j < max; j++ { + jx += incX + sum += x[ix+jx] * atmp[j] + } + x[ix] -= sum + if nonUnit { + x[ix] /= atmp[0] + } + ix -= incX + } + return + } + if incX == 1 { + for i := 0; i < n; i++ { + bands := k + if i-k < 0 { + bands = i + } + atmp := a[i*lda+k-bands:] + xtmp := x[i-bands : i] + var sum float32 + for j, v := range xtmp { + sum += v * atmp[j] + } + x[i] -= sum + if nonUnit { + x[i] /= atmp[bands] + } + } + return + } + ix := kx + for i := 0; i < n; i++ { + bands := k + if i-k < 0 { + bands = i + } + atmp := a[i*lda+k-bands:] + var ( + sum float32 + jx int + ) + for j := 0; j < bands; j++ { + sum += x[ix-bands*incX+jx] * atmp[j] + jx += incX + } + x[ix] -= sum + if nonUnit { + x[ix] /= atmp[bands] + } + ix += incX + } + return + } + // Cases where a is transposed. + if ul == blas.Upper { + if incX == 1 { + for i := 0; i < n; i++ { + bands := k + if i-k < 0 { + bands = i + } + var sum float32 + for j := 0; j < bands; j++ { + sum += x[i-bands+j] * a[(i-bands+j)*lda+bands-j] + } + x[i] -= sum + if nonUnit { + x[i] /= a[i*lda] + } + } + return + } + ix := kx + for i := 0; i < n; i++ { + bands := k + if i-k < 0 { + bands = i + } + var ( + sum float32 + jx int + ) + for j := 0; j < bands; j++ { + sum += x[ix-bands*incX+jx] * a[(i-bands+j)*lda+bands-j] + jx += incX + } + x[ix] -= sum + if nonUnit { + x[ix] /= a[i*lda] + } + ix += incX + } + return + } + if incX == 1 { + for i := n - 1; i >= 0; i-- { + bands := k + if i+bands >= n { + bands = n - i - 1 + } + var sum float32 + xtmp := x[i+1 : i+1+bands] + for j, v := range xtmp { + sum += v * a[(i+j+1)*lda+k-j-1] + } + x[i] -= sum + if nonUnit { + x[i] /= a[i*lda+k] + } + } + return + } + ix := kx + (n-1)*incX + for i := n - 1; i >= 0; i-- { + bands := k + if i+bands >= n { + bands = n - i - 1 + } + var ( + sum float32 + jx int + ) + for j := 0; j < bands; j++ { + sum += x[ix+jx+incX] * a[(i+j+1)*lda+k-j-1] + jx += incX + } + x[ix] -= sum + if nonUnit { + x[ix] /= a[i*lda+k] + } + ix -= incX + } +} + +// Ssbmv performs the matrix-vector operation +// +// y = alpha * A * x + beta * y +// +// where A is an n×n symmetric band matrix with k super-diagonals, x and y are +// vectors, and alpha and beta are scalars. +// +// Float32 implementations are autogenerated and not directly tested. +func (Implementation) Ssbmv(ul blas.Uplo, n, k int, alpha float32, a []float32, lda int, x []float32, incX int, beta float32, y []float32, incY int) { + if ul != blas.Lower && ul != blas.Upper { + panic(badUplo) + } + if n < 0 { + panic(nLT0) + } + if k < 0 { + panic(kLT0) + } + if lda < k+1 { + panic(badLdA) + } + if incX == 0 { + panic(zeroIncX) + } + if incY == 0 { + panic(zeroIncY) + } + + // Quick return if possible. + if n == 0 { + return + } + + // For zero matrix size the following slice length checks are trivially satisfied. + if len(a) < lda*(n-1)+k+1 { + panic(shortA) + } + if (incX > 0 && len(x) <= (n-1)*incX) || (incX < 0 && len(x) <= (1-n)*incX) { + panic(shortX) + } + if (incY > 0 && len(y) <= (n-1)*incY) || (incY < 0 && len(y) <= (1-n)*incY) { + panic(shortY) + } + + // Quick return if possible. + if alpha == 0 && beta == 1 { + return + } + + // Set up indexes + lenX := n + lenY := n + var kx, ky int + if incX < 0 { + kx = -(lenX - 1) * incX + } + if incY < 0 { + ky = -(lenY - 1) * incY + } + + // Form y = beta * y. + if beta != 1 { + if incY == 1 { + if beta == 0 { + for i := range y[:n] { + y[i] = 0 + } + } else { + f32.ScalUnitary(beta, y[:n]) + } + } else { + iy := ky + if beta == 0 { + for i := 0; i < n; i++ { + y[iy] = 0 + iy += incY + } + } else { + if incY > 0 { + f32.ScalInc(beta, y, uintptr(n), uintptr(incY)) + } else { + f32.ScalInc(beta, y, uintptr(n), uintptr(-incY)) + } + } + } + } + + if alpha == 0 { + return + } + + if ul == blas.Upper { + if incX == 1 { + iy := ky + for i := 0; i < n; i++ { + atmp := a[i*lda:] + tmp := alpha * x[i] + sum := tmp * atmp[0] + u := min(k, n-i-1) + jy := incY + for j := 1; j <= u; j++ { + v := atmp[j] + sum += alpha * x[i+j] * v + y[iy+jy] += tmp * v + jy += incY + } + y[iy] += sum + iy += incY + } + return + } + ix := kx + iy := ky + for i := 0; i < n; i++ { + atmp := a[i*lda:] + tmp := alpha * x[ix] + sum := tmp * atmp[0] + u := min(k, n-i-1) + jx := incX + jy := incY + for j := 1; j <= u; j++ { + v := atmp[j] + sum += alpha * x[ix+jx] * v + y[iy+jy] += tmp * v + jx += incX + jy += incY + } + y[iy] += sum + ix += incX + iy += incY + } + return + } + + // Cases where a has bands below the diagonal. + if incX == 1 { + iy := ky + for i := 0; i < n; i++ { + l := max(0, k-i) + tmp := alpha * x[i] + jy := l * incY + atmp := a[i*lda:] + for j := l; j < k; j++ { + v := atmp[j] + y[iy] += alpha * v * x[i-k+j] + y[iy-k*incY+jy] += tmp * v + jy += incY + } + y[iy] += tmp * atmp[k] + iy += incY + } + return + } + ix := kx + iy := ky + for i := 0; i < n; i++ { + l := max(0, k-i) + tmp := alpha * x[ix] + jx := l * incX + jy := l * incY + atmp := a[i*lda:] + for j := l; j < k; j++ { + v := atmp[j] + y[iy] += alpha * v * x[ix-k*incX+jx] + y[iy-k*incY+jy] += tmp * v + jx += incX + jy += incY + } + y[iy] += tmp * atmp[k] + ix += incX + iy += incY + } +} + +// Ssyr performs the symmetric rank-one update +// +// A += alpha * x * xᵀ +// +// where A is an n×n symmetric matrix, and x is a vector. +// +// Float32 implementations are autogenerated and not directly tested. +func (Implementation) Ssyr(ul blas.Uplo, n int, alpha float32, x []float32, incX int, a []float32, lda int) { + if ul != blas.Lower && ul != blas.Upper { + panic(badUplo) + } + if n < 0 { + panic(nLT0) + } + if lda < max(1, n) { + panic(badLdA) + } + if incX == 0 { + panic(zeroIncX) + } + + // Quick return if possible. + if n == 0 { + return + } + + // For zero matrix size the following slice length checks are trivially satisfied. + if (incX > 0 && len(x) <= (n-1)*incX) || (incX < 0 && len(x) <= (1-n)*incX) { + panic(shortX) + } + if len(a) < lda*(n-1)+n { + panic(shortA) + } + + // Quick return if possible. + if alpha == 0 { + return + } + + lenX := n + var kx int + if incX < 0 { + kx = -(lenX - 1) * incX + } + if ul == blas.Upper { + if incX == 1 { + for i := 0; i < n; i++ { + tmp := x[i] * alpha + if tmp != 0 { + atmp := a[i*lda+i : i*lda+n] + xtmp := x[i:n] + for j, v := range xtmp { + atmp[j] += v * tmp + } + } + } + return + } + ix := kx + for i := 0; i < n; i++ { + tmp := x[ix] * alpha + if tmp != 0 { + jx := ix + atmp := a[i*lda:] + for j := i; j < n; j++ { + atmp[j] += x[jx] * tmp + jx += incX + } + } + ix += incX + } + return + } + // Cases where a is lower triangular. + if incX == 1 { + for i := 0; i < n; i++ { + tmp := x[i] * alpha + if tmp != 0 { + atmp := a[i*lda:] + xtmp := x[:i+1] + for j, v := range xtmp { + atmp[j] += tmp * v + } + } + } + return + } + ix := kx + for i := 0; i < n; i++ { + tmp := x[ix] * alpha + if tmp != 0 { + atmp := a[i*lda:] + jx := kx + for j := 0; j < i+1; j++ { + atmp[j] += tmp * x[jx] + jx += incX + } + } + ix += incX + } +} + +// Ssyr2 performs the symmetric rank-two update +// +// A += alpha * x * yᵀ + alpha * y * xᵀ +// +// where A is an n×n symmetric matrix, x and y are vectors, and alpha is a scalar. +// +// Float32 implementations are autogenerated and not directly tested. +func (Implementation) Ssyr2(ul blas.Uplo, n int, alpha float32, x []float32, incX int, y []float32, incY int, a []float32, lda int) { + if ul != blas.Lower && ul != blas.Upper { + panic(badUplo) + } + if n < 0 { + panic(nLT0) + } + if lda < max(1, n) { + panic(badLdA) + } + if incX == 0 { + panic(zeroIncX) + } + if incY == 0 { + panic(zeroIncY) + } + + // Quick return if possible. + if n == 0 { + return + } + + // For zero matrix size the following slice length checks are trivially satisfied. + if (incX > 0 && len(x) <= (n-1)*incX) || (incX < 0 && len(x) <= (1-n)*incX) { + panic(shortX) + } + if (incY > 0 && len(y) <= (n-1)*incY) || (incY < 0 && len(y) <= (1-n)*incY) { + panic(shortY) + } + if len(a) < lda*(n-1)+n { + panic(shortA) + } + + // Quick return if possible. + if alpha == 0 { + return + } + + var ky, kx int + if incY < 0 { + ky = -(n - 1) * incY + } + if incX < 0 { + kx = -(n - 1) * incX + } + if ul == blas.Upper { + if incX == 1 && incY == 1 { + for i := 0; i < n; i++ { + xi := x[i] + yi := y[i] + atmp := a[i*lda:] + for j := i; j < n; j++ { + atmp[j] += alpha * (xi*y[j] + x[j]*yi) + } + } + return + } + ix := kx + iy := ky + for i := 0; i < n; i++ { + jx := kx + i*incX + jy := ky + i*incY + xi := x[ix] + yi := y[iy] + atmp := a[i*lda:] + for j := i; j < n; j++ { + atmp[j] += alpha * (xi*y[jy] + x[jx]*yi) + jx += incX + jy += incY + } + ix += incX + iy += incY + } + return + } + if incX == 1 && incY == 1 { + for i := 0; i < n; i++ { + xi := x[i] + yi := y[i] + atmp := a[i*lda:] + for j := 0; j <= i; j++ { + atmp[j] += alpha * (xi*y[j] + x[j]*yi) + } + } + return + } + ix := kx + iy := ky + for i := 0; i < n; i++ { + jx := kx + jy := ky + xi := x[ix] + yi := y[iy] + atmp := a[i*lda:] + for j := 0; j <= i; j++ { + atmp[j] += alpha * (xi*y[jy] + x[jx]*yi) + jx += incX + jy += incY + } + ix += incX + iy += incY + } +} + +// Stpsv solves one of the systems of equations +// +// A * x = b if tA == blas.NoTrans +// Aᵀ * x = b if tA == blas.Trans or blas.ConjTrans +// +// where A is an n×n triangular matrix in packed format, and x and b are vectors. +// +// At entry to the function, x contains the values of b, and the result is +// stored in-place into x. +// +// No test for singularity or near-singularity is included in this +// routine. Such tests must be performed before calling this routine. +// +// Float32 implementations are autogenerated and not directly tested. +func (Implementation) Stpsv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n int, ap []float32, x []float32, incX int) { + if ul != blas.Lower && ul != blas.Upper { + panic(badUplo) + } + if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans { + panic(badTranspose) + } + if d != blas.NonUnit && d != blas.Unit { + panic(badDiag) + } + if n < 0 { + panic(nLT0) + } + if incX == 0 { + panic(zeroIncX) + } + + // Quick return if possible. + if n == 0 { + return + } + + // For zero matrix size the following slice length checks are trivially satisfied. + if len(ap) < n*(n+1)/2 { + panic(shortAP) + } + if (incX > 0 && len(x) <= (n-1)*incX) || (incX < 0 && len(x) <= (1-n)*incX) { + panic(shortX) + } + + var kx int + if incX < 0 { + kx = -(n - 1) * incX + } + + nonUnit := d == blas.NonUnit + var offset int // Offset is the index of (i,i) + if tA == blas.NoTrans { + if ul == blas.Upper { + offset = n*(n+1)/2 - 1 + if incX == 1 { + for i := n - 1; i >= 0; i-- { + atmp := ap[offset+1 : offset+n-i] + xtmp := x[i+1:] + var sum float32 + for j, v := range atmp { + sum += v * xtmp[j] + } + x[i] -= sum + if nonUnit { + x[i] /= ap[offset] + } + offset -= n - i + 1 + } + return + } + ix := kx + (n-1)*incX + for i := n - 1; i >= 0; i-- { + atmp := ap[offset+1 : offset+n-i] + jx := kx + (i+1)*incX + var sum float32 + for _, v := range atmp { + sum += v * x[jx] + jx += incX + } + x[ix] -= sum + if nonUnit { + x[ix] /= ap[offset] + } + ix -= incX + offset -= n - i + 1 + } + return + } + if incX == 1 { + for i := 0; i < n; i++ { + atmp := ap[offset-i : offset] + var sum float32 + for j, v := range atmp { + sum += v * x[j] + } + x[i] -= sum + if nonUnit { + x[i] /= ap[offset] + } + offset += i + 2 + } + return + } + ix := kx + for i := 0; i < n; i++ { + jx := kx + atmp := ap[offset-i : offset] + var sum float32 + for _, v := range atmp { + sum += v * x[jx] + jx += incX + } + x[ix] -= sum + if nonUnit { + x[ix] /= ap[offset] + } + ix += incX + offset += i + 2 + } + return + } + // Cases where ap is transposed. + if ul == blas.Upper { + if incX == 1 { + for i := 0; i < n; i++ { + if nonUnit { + x[i] /= ap[offset] + } + xi := x[i] + atmp := ap[offset+1 : offset+n-i] + xtmp := x[i+1:] + for j, v := range atmp { + xtmp[j] -= v * xi + } + offset += n - i + } + return + } + ix := kx + for i := 0; i < n; i++ { + if nonUnit { + x[ix] /= ap[offset] + } + xix := x[ix] + atmp := ap[offset+1 : offset+n-i] + jx := kx + (i+1)*incX + for _, v := range atmp { + x[jx] -= v * xix + jx += incX + } + ix += incX + offset += n - i + } + return + } + if incX == 1 { + offset = n*(n+1)/2 - 1 + for i := n - 1; i >= 0; i-- { + if nonUnit { + x[i] /= ap[offset] + } + xi := x[i] + atmp := ap[offset-i : offset] + for j, v := range atmp { + x[j] -= v * xi + } + offset -= i + 1 + } + return + } + ix := kx + (n-1)*incX + offset = n*(n+1)/2 - 1 + for i := n - 1; i >= 0; i-- { + if nonUnit { + x[ix] /= ap[offset] + } + xix := x[ix] + atmp := ap[offset-i : offset] + jx := kx + for _, v := range atmp { + x[jx] -= v * xix + jx += incX + } + ix -= incX + offset -= i + 1 + } +} + +// Sspmv performs the matrix-vector operation +// +// y = alpha * A * x + beta * y +// +// where A is an n×n symmetric matrix in packed format, x and y are vectors, +// and alpha and beta are scalars. +// +// Float32 implementations are autogenerated and not directly tested. +func (Implementation) Sspmv(ul blas.Uplo, n int, alpha float32, ap []float32, x []float32, incX int, beta float32, y []float32, incY int) { + if ul != blas.Lower && ul != blas.Upper { + panic(badUplo) + } + if n < 0 { + panic(nLT0) + } + if incX == 0 { + panic(zeroIncX) + } + if incY == 0 { + panic(zeroIncY) + } + + // Quick return if possible. + if n == 0 { + return + } + + // For zero matrix size the following slice length checks are trivially satisfied. + if len(ap) < n*(n+1)/2 { + panic(shortAP) + } + if (incX > 0 && len(x) <= (n-1)*incX) || (incX < 0 && len(x) <= (1-n)*incX) { + panic(shortX) + } + if (incY > 0 && len(y) <= (n-1)*incY) || (incY < 0 && len(y) <= (1-n)*incY) { + panic(shortY) + } + + // Quick return if possible. + if alpha == 0 && beta == 1 { + return + } + + // Set up start points + var kx, ky int + if incX < 0 { + kx = -(n - 1) * incX + } + if incY < 0 { + ky = -(n - 1) * incY + } + + // Form y = beta * y. + if beta != 1 { + if incY == 1 { + if beta == 0 { + for i := range y[:n] { + y[i] = 0 + } + } else { + f32.ScalUnitary(beta, y[:n]) + } + } else { + iy := ky + if beta == 0 { + for i := 0; i < n; i++ { + y[iy] = 0 + iy += incY + } + } else { + if incY > 0 { + f32.ScalInc(beta, y, uintptr(n), uintptr(incY)) + } else { + f32.ScalInc(beta, y, uintptr(n), uintptr(-incY)) + } + } + } + } + + if alpha == 0 { + return + } + + if n == 1 { + y[0] += alpha * ap[0] * x[0] + return + } + var offset int // Offset is the index of (i,i). + if ul == blas.Upper { + if incX == 1 { + iy := ky + for i := 0; i < n; i++ { + xv := x[i] * alpha + sum := ap[offset] * x[i] + atmp := ap[offset+1 : offset+n-i] + xtmp := x[i+1:] + jy := ky + (i+1)*incY + for j, v := range atmp { + sum += v * xtmp[j] + y[jy] += v * xv + jy += incY + } + y[iy] += alpha * sum + iy += incY + offset += n - i + } + return + } + ix := kx + iy := ky + for i := 0; i < n; i++ { + xv := x[ix] * alpha + sum := ap[offset] * x[ix] + atmp := ap[offset+1 : offset+n-i] + jx := kx + (i+1)*incX + jy := ky + (i+1)*incY + for _, v := range atmp { + sum += v * x[jx] + y[jy] += v * xv + jx += incX + jy += incY + } + y[iy] += alpha * sum + ix += incX + iy += incY + offset += n - i + } + return + } + if incX == 1 { + iy := ky + for i := 0; i < n; i++ { + xv := x[i] * alpha + atmp := ap[offset-i : offset] + jy := ky + var sum float32 + for j, v := range atmp { + sum += v * x[j] + y[jy] += v * xv + jy += incY + } + sum += ap[offset] * x[i] + y[iy] += alpha * sum + iy += incY + offset += i + 2 + } + return + } + ix := kx + iy := ky + for i := 0; i < n; i++ { + xv := x[ix] * alpha + atmp := ap[offset-i : offset] + jx := kx + jy := ky + var sum float32 + for _, v := range atmp { + sum += v * x[jx] + y[jy] += v * xv + jx += incX + jy += incY + } + + sum += ap[offset] * x[ix] + y[iy] += alpha * sum + ix += incX + iy += incY + offset += i + 2 + } +} + +// Sspr performs the symmetric rank-one operation +// +// A += alpha * x * xᵀ +// +// where A is an n×n symmetric matrix in packed format, x is a vector, and +// alpha is a scalar. +// +// Float32 implementations are autogenerated and not directly tested. +func (Implementation) Sspr(ul blas.Uplo, n int, alpha float32, x []float32, incX int, ap []float32) { + if ul != blas.Lower && ul != blas.Upper { + panic(badUplo) + } + if n < 0 { + panic(nLT0) + } + if incX == 0 { + panic(zeroIncX) + } + + // Quick return if possible. + if n == 0 { + return + } + + // For zero matrix size the following slice length checks are trivially satisfied. + if (incX > 0 && len(x) <= (n-1)*incX) || (incX < 0 && len(x) <= (1-n)*incX) { + panic(shortX) + } + if len(ap) < n*(n+1)/2 { + panic(shortAP) + } + + // Quick return if possible. + if alpha == 0 { + return + } + + lenX := n + var kx int + if incX < 0 { + kx = -(lenX - 1) * incX + } + var offset int // Offset is the index of (i,i). + if ul == blas.Upper { + if incX == 1 { + for i := 0; i < n; i++ { + atmp := ap[offset:] + xv := alpha * x[i] + xtmp := x[i:n] + for j, v := range xtmp { + atmp[j] += xv * v + } + offset += n - i + } + return + } + ix := kx + for i := 0; i < n; i++ { + jx := kx + i*incX + atmp := ap[offset:] + xv := alpha * x[ix] + for j := 0; j < n-i; j++ { + atmp[j] += xv * x[jx] + jx += incX + } + ix += incX + offset += n - i + } + return + } + if incX == 1 { + for i := 0; i < n; i++ { + atmp := ap[offset-i:] + xv := alpha * x[i] + xtmp := x[:i+1] + for j, v := range xtmp { + atmp[j] += xv * v + } + offset += i + 2 + } + return + } + ix := kx + for i := 0; i < n; i++ { + jx := kx + atmp := ap[offset-i:] + xv := alpha * x[ix] + for j := 0; j <= i; j++ { + atmp[j] += xv * x[jx] + jx += incX + } + ix += incX + offset += i + 2 + } +} + +// Sspr2 performs the symmetric rank-2 update +// +// A += alpha * x * yᵀ + alpha * y * xᵀ +// +// where A is an n×n symmetric matrix in packed format, x and y are vectors, +// and alpha is a scalar. +// +// Float32 implementations are autogenerated and not directly tested. +func (Implementation) Sspr2(ul blas.Uplo, n int, alpha float32, x []float32, incX int, y []float32, incY int, ap []float32) { + if ul != blas.Lower && ul != blas.Upper { + panic(badUplo) + } + if n < 0 { + panic(nLT0) + } + if incX == 0 { + panic(zeroIncX) + } + if incY == 0 { + panic(zeroIncY) + } + + // Quick return if possible. + if n == 0 { + return + } + + // For zero matrix size the following slice length checks are trivially satisfied. + if (incX > 0 && len(x) <= (n-1)*incX) || (incX < 0 && len(x) <= (1-n)*incX) { + panic(shortX) + } + if (incY > 0 && len(y) <= (n-1)*incY) || (incY < 0 && len(y) <= (1-n)*incY) { + panic(shortY) + } + if len(ap) < n*(n+1)/2 { + panic(shortAP) + } + + // Quick return if possible. + if alpha == 0 { + return + } + + var ky, kx int + if incY < 0 { + ky = -(n - 1) * incY + } + if incX < 0 { + kx = -(n - 1) * incX + } + var offset int // Offset is the index of (i,i). + if ul == blas.Upper { + if incX == 1 && incY == 1 { + for i := 0; i < n; i++ { + atmp := ap[offset:] + xi := x[i] + yi := y[i] + xtmp := x[i:n] + ytmp := y[i:n] + for j, v := range xtmp { + atmp[j] += alpha * (xi*ytmp[j] + v*yi) + } + offset += n - i + } + return + } + ix := kx + iy := ky + for i := 0; i < n; i++ { + jx := kx + i*incX + jy := ky + i*incY + atmp := ap[offset:] + xi := x[ix] + yi := y[iy] + for j := 0; j < n-i; j++ { + atmp[j] += alpha * (xi*y[jy] + x[jx]*yi) + jx += incX + jy += incY + } + ix += incX + iy += incY + offset += n - i + } + return + } + if incX == 1 && incY == 1 { + for i := 0; i < n; i++ { + atmp := ap[offset-i:] + xi := x[i] + yi := y[i] + xtmp := x[:i+1] + for j, v := range xtmp { + atmp[j] += alpha * (xi*y[j] + v*yi) + } + offset += i + 2 + } + return + } + ix := kx + iy := ky + for i := 0; i < n; i++ { + jx := kx + jy := ky + atmp := ap[offset-i:] + for j := 0; j <= i; j++ { + atmp[j] += alpha * (x[ix]*y[jy] + x[jx]*y[iy]) + jx += incX + jy += incY + } + ix += incX + iy += incY + offset += i + 2 + } +} diff --git a/vendor/gonum.org/v1/gonum/blas/gonum/level2float64.go b/vendor/gonum.org/v1/gonum/blas/gonum/level2float64.go new file mode 100644 index 0000000000..19b9c7e1c3 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/blas/gonum/level2float64.go @@ -0,0 +1,2366 @@ +// Copyright ©2014 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gonum + +import ( + "gonum.org/v1/gonum/blas" + "gonum.org/v1/gonum/internal/asm/f64" +) + +var _ blas.Float64Level2 = Implementation{} + +// Dger performs the rank-one operation +// +// A += alpha * x * yᵀ +// +// where A is an m×n dense matrix, x and y are vectors, and alpha is a scalar. +func (Implementation) Dger(m, n int, alpha float64, x []float64, incX int, y []float64, incY int, a []float64, lda int) { + if m < 0 { + panic(mLT0) + } + if n < 0 { + panic(nLT0) + } + if lda < max(1, n) { + panic(badLdA) + } + if incX == 0 { + panic(zeroIncX) + } + if incY == 0 { + panic(zeroIncY) + } + + // Quick return if possible. + if m == 0 || n == 0 { + return + } + + // For zero matrix size the following slice length checks are trivially satisfied. + if (incX > 0 && len(x) <= (m-1)*incX) || (incX < 0 && len(x) <= (1-m)*incX) { + panic(shortX) + } + if (incY > 0 && len(y) <= (n-1)*incY) || (incY < 0 && len(y) <= (1-n)*incY) { + panic(shortY) + } + if len(a) < lda*(m-1)+n { + panic(shortA) + } + + // Quick return if possible. + if alpha == 0 { + return + } + f64.Ger(uintptr(m), uintptr(n), + alpha, + x, uintptr(incX), + y, uintptr(incY), + a, uintptr(lda)) +} + +// Dgbmv performs one of the matrix-vector operations +// +// y = alpha * A * x + beta * y if tA == blas.NoTrans +// y = alpha * Aᵀ * x + beta * y if tA == blas.Trans or blas.ConjTrans +// +// where A is an m×n band matrix with kL sub-diagonals and kU super-diagonals, +// x and y are vectors, and alpha and beta are scalars. +func (Implementation) Dgbmv(tA blas.Transpose, m, n, kL, kU int, alpha float64, a []float64, lda int, x []float64, incX int, beta float64, y []float64, incY int) { + if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans { + panic(badTranspose) + } + if m < 0 { + panic(mLT0) + } + if n < 0 { + panic(nLT0) + } + if kL < 0 { + panic(kLLT0) + } + if kU < 0 { + panic(kULT0) + } + if lda < kL+kU+1 { + panic(badLdA) + } + if incX == 0 { + panic(zeroIncX) + } + if incY == 0 { + panic(zeroIncY) + } + + // Quick return if possible. + if m == 0 || n == 0 { + return + } + + // For zero matrix size the following slice length checks are trivially satisfied. + if len(a) < lda*(min(m, n+kL)-1)+kL+kU+1 { + panic(shortA) + } + lenX := m + lenY := n + if tA == blas.NoTrans { + lenX = n + lenY = m + } + if (incX > 0 && len(x) <= (lenX-1)*incX) || (incX < 0 && len(x) <= (1-lenX)*incX) { + panic(shortX) + } + if (incY > 0 && len(y) <= (lenY-1)*incY) || (incY < 0 && len(y) <= (1-lenY)*incY) { + panic(shortY) + } + + // Quick return if possible. + if alpha == 0 && beta == 1 { + return + } + + var kx, ky int + if incX < 0 { + kx = -(lenX - 1) * incX + } + if incY < 0 { + ky = -(lenY - 1) * incY + } + + // Form y = beta * y. + if beta != 1 { + if incY == 1 { + if beta == 0 { + for i := range y[:lenY] { + y[i] = 0 + } + } else { + f64.ScalUnitary(beta, y[:lenY]) + } + } else { + iy := ky + if beta == 0 { + for i := 0; i < lenY; i++ { + y[iy] = 0 + iy += incY + } + } else { + if incY > 0 { + f64.ScalInc(beta, y, uintptr(lenY), uintptr(incY)) + } else { + f64.ScalInc(beta, y, uintptr(lenY), uintptr(-incY)) + } + } + } + } + + if alpha == 0 { + return + } + + // i and j are indices of the compacted banded matrix. + // off is the offset into the dense matrix (off + j = densej) + nCol := kU + 1 + kL + if tA == blas.NoTrans { + iy := ky + if incX == 1 { + for i := 0; i < min(m, n+kL); i++ { + l := max(0, kL-i) + u := min(nCol, n+kL-i) + off := max(0, i-kL) + atmp := a[i*lda+l : i*lda+u] + xtmp := x[off : off+u-l] + var sum float64 + for j, v := range atmp { + sum += xtmp[j] * v + } + y[iy] += sum * alpha + iy += incY + } + return + } + for i := 0; i < min(m, n+kL); i++ { + l := max(0, kL-i) + u := min(nCol, n+kL-i) + off := max(0, i-kL) + atmp := a[i*lda+l : i*lda+u] + jx := kx + var sum float64 + for _, v := range atmp { + sum += x[off*incX+jx] * v + jx += incX + } + y[iy] += sum * alpha + iy += incY + } + return + } + if incX == 1 { + for i := 0; i < min(m, n+kL); i++ { + l := max(0, kL-i) + u := min(nCol, n+kL-i) + off := max(0, i-kL) + atmp := a[i*lda+l : i*lda+u] + tmp := alpha * x[i] + jy := ky + for _, v := range atmp { + y[jy+off*incY] += tmp * v + jy += incY + } + } + return + } + ix := kx + for i := 0; i < min(m, n+kL); i++ { + l := max(0, kL-i) + u := min(nCol, n+kL-i) + off := max(0, i-kL) + atmp := a[i*lda+l : i*lda+u] + tmp := alpha * x[ix] + jy := ky + for _, v := range atmp { + y[jy+off*incY] += tmp * v + jy += incY + } + ix += incX + } +} + +// Dgemv computes +// +// y = alpha * A * x + beta * y if tA = blas.NoTrans +// y = alpha * Aᵀ * x + beta * y if tA = blas.Trans or blas.ConjTrans +// +// where A is an m×n dense matrix, x and y are vectors, and alpha and beta are scalars. +func (Implementation) Dgemv(tA blas.Transpose, m, n int, alpha float64, a []float64, lda int, x []float64, incX int, beta float64, y []float64, incY int) { + if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans { + panic(badTranspose) + } + if m < 0 { + panic(mLT0) + } + if n < 0 { + panic(nLT0) + } + if lda < max(1, n) { + panic(badLdA) + } + if incX == 0 { + panic(zeroIncX) + } + if incY == 0 { + panic(zeroIncY) + } + // Set up indexes + lenX := m + lenY := n + if tA == blas.NoTrans { + lenX = n + lenY = m + } + + // Quick return if possible + if m == 0 || n == 0 { + return + } + + if (incX > 0 && (lenX-1)*incX >= len(x)) || (incX < 0 && (1-lenX)*incX >= len(x)) { + panic(shortX) + } + if (incY > 0 && (lenY-1)*incY >= len(y)) || (incY < 0 && (1-lenY)*incY >= len(y)) { + panic(shortY) + } + if len(a) < lda*(m-1)+n { + panic(shortA) + } + + // Quick return if possible + if alpha == 0 && beta == 1 { + return + } + + if alpha == 0 { + // First form y = beta * y + if incY > 0 { + Implementation{}.Dscal(lenY, beta, y, incY) + } else { + Implementation{}.Dscal(lenY, beta, y, -incY) + } + return + } + + // Form y = alpha * A * x + y + if tA == blas.NoTrans { + f64.GemvN(uintptr(m), uintptr(n), alpha, a, uintptr(lda), x, uintptr(incX), beta, y, uintptr(incY)) + return + } + // Cases where a is transposed. + f64.GemvT(uintptr(m), uintptr(n), alpha, a, uintptr(lda), x, uintptr(incX), beta, y, uintptr(incY)) +} + +// Dtrmv performs one of the matrix-vector operations +// +// x = A * x if tA == blas.NoTrans +// x = Aᵀ * x if tA == blas.Trans or blas.ConjTrans +// +// where A is an n×n triangular matrix, and x is a vector. +func (Implementation) Dtrmv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n int, a []float64, lda int, x []float64, incX int) { + if ul != blas.Lower && ul != blas.Upper { + panic(badUplo) + } + if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans { + panic(badTranspose) + } + if d != blas.NonUnit && d != blas.Unit { + panic(badDiag) + } + if n < 0 { + panic(nLT0) + } + if lda < max(1, n) { + panic(badLdA) + } + if incX == 0 { + panic(zeroIncX) + } + + // Quick return if possible. + if n == 0 { + return + } + + // For zero matrix size the following slice length checks are trivially satisfied. + if len(a) < lda*(n-1)+n { + panic(shortA) + } + if (incX > 0 && len(x) <= (n-1)*incX) || (incX < 0 && len(x) <= (1-n)*incX) { + panic(shortX) + } + + nonUnit := d != blas.Unit + if n == 1 { + if nonUnit { + x[0] *= a[0] + } + return + } + var kx int + if incX <= 0 { + kx = -(n - 1) * incX + } + if tA == blas.NoTrans { + if ul == blas.Upper { + if incX == 1 { + for i := 0; i < n; i++ { + ilda := i * lda + var tmp float64 + if nonUnit { + tmp = a[ilda+i] * x[i] + } else { + tmp = x[i] + } + x[i] = tmp + f64.DotUnitary(a[ilda+i+1:ilda+n], x[i+1:n]) + } + return + } + ix := kx + for i := 0; i < n; i++ { + ilda := i * lda + var tmp float64 + if nonUnit { + tmp = a[ilda+i] * x[ix] + } else { + tmp = x[ix] + } + x[ix] = tmp + f64.DotInc(x, a[ilda+i+1:ilda+n], uintptr(n-i-1), uintptr(incX), 1, uintptr(ix+incX), 0) + ix += incX + } + return + } + if incX == 1 { + for i := n - 1; i >= 0; i-- { + ilda := i * lda + var tmp float64 + if nonUnit { + tmp += a[ilda+i] * x[i] + } else { + tmp = x[i] + } + x[i] = tmp + f64.DotUnitary(a[ilda:ilda+i], x[:i]) + } + return + } + ix := kx + (n-1)*incX + for i := n - 1; i >= 0; i-- { + ilda := i * lda + var tmp float64 + if nonUnit { + tmp = a[ilda+i] * x[ix] + } else { + tmp = x[ix] + } + x[ix] = tmp + f64.DotInc(x, a[ilda:ilda+i], uintptr(i), uintptr(incX), 1, uintptr(kx), 0) + ix -= incX + } + return + } + // Cases where a is transposed. + if ul == blas.Upper { + if incX == 1 { + for i := n - 1; i >= 0; i-- { + ilda := i * lda + xi := x[i] + f64.AxpyUnitary(xi, a[ilda+i+1:ilda+n], x[i+1:n]) + if nonUnit { + x[i] *= a[ilda+i] + } + } + return + } + ix := kx + (n-1)*incX + for i := n - 1; i >= 0; i-- { + ilda := i * lda + xi := x[ix] + f64.AxpyInc(xi, a[ilda+i+1:ilda+n], x, uintptr(n-i-1), 1, uintptr(incX), 0, uintptr(kx+(i+1)*incX)) + if nonUnit { + x[ix] *= a[ilda+i] + } + ix -= incX + } + return + } + if incX == 1 { + for i := 0; i < n; i++ { + ilda := i * lda + xi := x[i] + f64.AxpyUnitary(xi, a[ilda:ilda+i], x[:i]) + if nonUnit { + x[i] *= a[i*lda+i] + } + } + return + } + ix := kx + for i := 0; i < n; i++ { + ilda := i * lda + xi := x[ix] + f64.AxpyInc(xi, a[ilda:ilda+i], x, uintptr(i), 1, uintptr(incX), 0, uintptr(kx)) + if nonUnit { + x[ix] *= a[ilda+i] + } + ix += incX + } +} + +// Dtrsv solves one of the systems of equations +// +// A * x = b if tA == blas.NoTrans +// Aᵀ * x = b if tA == blas.Trans or blas.ConjTrans +// +// where A is an n×n triangular matrix, and x and b are vectors. +// +// At entry to the function, x contains the values of b, and the result is +// stored in-place into x. +// +// No test for singularity or near-singularity is included in this +// routine. Such tests must be performed before calling this routine. +func (Implementation) Dtrsv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n int, a []float64, lda int, x []float64, incX int) { + if ul != blas.Lower && ul != blas.Upper { + panic(badUplo) + } + if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans { + panic(badTranspose) + } + if d != blas.NonUnit && d != blas.Unit { + panic(badDiag) + } + if n < 0 { + panic(nLT0) + } + if lda < max(1, n) { + panic(badLdA) + } + if incX == 0 { + panic(zeroIncX) + } + + // Quick return if possible. + if n == 0 { + return + } + + // For zero matrix size the following slice length checks are trivially satisfied. + if len(a) < lda*(n-1)+n { + panic(shortA) + } + if (incX > 0 && len(x) <= (n-1)*incX) || (incX < 0 && len(x) <= (1-n)*incX) { + panic(shortX) + } + + if n == 1 { + if d == blas.NonUnit { + x[0] /= a[0] + } + return + } + + var kx int + if incX < 0 { + kx = -(n - 1) * incX + } + nonUnit := d == blas.NonUnit + if tA == blas.NoTrans { + if ul == blas.Upper { + if incX == 1 { + for i := n - 1; i >= 0; i-- { + var sum float64 + atmp := a[i*lda+i+1 : i*lda+n] + for j, v := range atmp { + jv := i + j + 1 + sum += x[jv] * v + } + x[i] -= sum + if nonUnit { + x[i] /= a[i*lda+i] + } + } + return + } + ix := kx + (n-1)*incX + for i := n - 1; i >= 0; i-- { + var sum float64 + jx := ix + incX + atmp := a[i*lda+i+1 : i*lda+n] + for _, v := range atmp { + sum += x[jx] * v + jx += incX + } + x[ix] -= sum + if nonUnit { + x[ix] /= a[i*lda+i] + } + ix -= incX + } + return + } + if incX == 1 { + for i := 0; i < n; i++ { + var sum float64 + atmp := a[i*lda : i*lda+i] + for j, v := range atmp { + sum += x[j] * v + } + x[i] -= sum + if nonUnit { + x[i] /= a[i*lda+i] + } + } + return + } + ix := kx + for i := 0; i < n; i++ { + jx := kx + var sum float64 + atmp := a[i*lda : i*lda+i] + for _, v := range atmp { + sum += x[jx] * v + jx += incX + } + x[ix] -= sum + if nonUnit { + x[ix] /= a[i*lda+i] + } + ix += incX + } + return + } + // Cases where a is transposed. + if ul == blas.Upper { + if incX == 1 { + for i := 0; i < n; i++ { + if nonUnit { + x[i] /= a[i*lda+i] + } + xi := x[i] + atmp := a[i*lda+i+1 : i*lda+n] + for j, v := range atmp { + jv := j + i + 1 + x[jv] -= v * xi + } + } + return + } + ix := kx + for i := 0; i < n; i++ { + if nonUnit { + x[ix] /= a[i*lda+i] + } + xi := x[ix] + jx := kx + (i+1)*incX + atmp := a[i*lda+i+1 : i*lda+n] + for _, v := range atmp { + x[jx] -= v * xi + jx += incX + } + ix += incX + } + return + } + if incX == 1 { + for i := n - 1; i >= 0; i-- { + if nonUnit { + x[i] /= a[i*lda+i] + } + xi := x[i] + atmp := a[i*lda : i*lda+i] + for j, v := range atmp { + x[j] -= v * xi + } + } + return + } + ix := kx + (n-1)*incX + for i := n - 1; i >= 0; i-- { + if nonUnit { + x[ix] /= a[i*lda+i] + } + xi := x[ix] + jx := kx + atmp := a[i*lda : i*lda+i] + for _, v := range atmp { + x[jx] -= v * xi + jx += incX + } + ix -= incX + } +} + +// Dsymv performs the matrix-vector operation +// +// y = alpha * A * x + beta * y +// +// where A is an n×n symmetric matrix, x and y are vectors, and alpha and +// beta are scalars. +func (Implementation) Dsymv(ul blas.Uplo, n int, alpha float64, a []float64, lda int, x []float64, incX int, beta float64, y []float64, incY int) { + if ul != blas.Lower && ul != blas.Upper { + panic(badUplo) + } + if n < 0 { + panic(nLT0) + } + if lda < max(1, n) { + panic(badLdA) + } + if incX == 0 { + panic(zeroIncX) + } + if incY == 0 { + panic(zeroIncY) + } + + // Quick return if possible. + if n == 0 { + return + } + + // For zero matrix size the following slice length checks are trivially satisfied. + if len(a) < lda*(n-1)+n { + panic(shortA) + } + if (incX > 0 && len(x) <= (n-1)*incX) || (incX < 0 && len(x) <= (1-n)*incX) { + panic(shortX) + } + if (incY > 0 && len(y) <= (n-1)*incY) || (incY < 0 && len(y) <= (1-n)*incY) { + panic(shortY) + } + + // Quick return if possible. + if alpha == 0 && beta == 1 { + return + } + + // Set up start points + var kx, ky int + if incX < 0 { + kx = -(n - 1) * incX + } + if incY < 0 { + ky = -(n - 1) * incY + } + + // Form y = beta * y + if beta != 1 { + if incY == 1 { + if beta == 0 { + for i := range y[:n] { + y[i] = 0 + } + } else { + f64.ScalUnitary(beta, y[:n]) + } + } else { + iy := ky + if beta == 0 { + for i := 0; i < n; i++ { + y[iy] = 0 + iy += incY + } + } else { + if incY > 0 { + f64.ScalInc(beta, y, uintptr(n), uintptr(incY)) + } else { + f64.ScalInc(beta, y, uintptr(n), uintptr(-incY)) + } + } + } + } + + if alpha == 0 { + return + } + + if n == 1 { + y[0] += alpha * a[0] * x[0] + return + } + + if ul == blas.Upper { + if incX == 1 { + iy := ky + for i := 0; i < n; i++ { + xv := x[i] * alpha + sum := x[i] * a[i*lda+i] + jy := ky + (i+1)*incY + atmp := a[i*lda+i+1 : i*lda+n] + for j, v := range atmp { + jp := j + i + 1 + sum += x[jp] * v + y[jy] += xv * v + jy += incY + } + y[iy] += alpha * sum + iy += incY + } + return + } + ix := kx + iy := ky + for i := 0; i < n; i++ { + xv := x[ix] * alpha + sum := x[ix] * a[i*lda+i] + jx := kx + (i+1)*incX + jy := ky + (i+1)*incY + atmp := a[i*lda+i+1 : i*lda+n] + for _, v := range atmp { + sum += x[jx] * v + y[jy] += xv * v + jx += incX + jy += incY + } + y[iy] += alpha * sum + ix += incX + iy += incY + } + return + } + // Cases where a is lower triangular. + if incX == 1 { + iy := ky + for i := 0; i < n; i++ { + jy := ky + xv := alpha * x[i] + atmp := a[i*lda : i*lda+i] + var sum float64 + for j, v := range atmp { + sum += x[j] * v + y[jy] += xv * v + jy += incY + } + sum += x[i] * a[i*lda+i] + sum *= alpha + y[iy] += sum + iy += incY + } + return + } + ix := kx + iy := ky + for i := 0; i < n; i++ { + jx := kx + jy := ky + xv := alpha * x[ix] + atmp := a[i*lda : i*lda+i] + var sum float64 + for _, v := range atmp { + sum += x[jx] * v + y[jy] += xv * v + jx += incX + jy += incY + } + sum += x[ix] * a[i*lda+i] + sum *= alpha + y[iy] += sum + ix += incX + iy += incY + } +} + +// Dtbmv performs one of the matrix-vector operations +// +// x = A * x if tA == blas.NoTrans +// x = Aᵀ * x if tA == blas.Trans or blas.ConjTrans +// +// where A is an n×n triangular band matrix with k+1 diagonals, and x is a vector. +func (Implementation) Dtbmv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n, k int, a []float64, lda int, x []float64, incX int) { + if ul != blas.Lower && ul != blas.Upper { + panic(badUplo) + } + if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans { + panic(badTranspose) + } + if d != blas.NonUnit && d != blas.Unit { + panic(badDiag) + } + if n < 0 { + panic(nLT0) + } + if k < 0 { + panic(kLT0) + } + if lda < k+1 { + panic(badLdA) + } + if incX == 0 { + panic(zeroIncX) + } + + // Quick return if possible. + if n == 0 { + return + } + + // For zero matrix size the following slice length checks are trivially satisfied. + if len(a) < lda*(n-1)+k+1 { + panic(shortA) + } + if (incX > 0 && len(x) <= (n-1)*incX) || (incX < 0 && len(x) <= (1-n)*incX) { + panic(shortX) + } + + var kx int + if incX < 0 { + kx = -(n - 1) * incX + } + + nonunit := d != blas.Unit + + if tA == blas.NoTrans { + if ul == blas.Upper { + if incX == 1 { + for i := 0; i < n; i++ { + u := min(1+k, n-i) + var sum float64 + atmp := a[i*lda:] + xtmp := x[i:] + for j := 1; j < u; j++ { + sum += xtmp[j] * atmp[j] + } + if nonunit { + sum += xtmp[0] * atmp[0] + } else { + sum += xtmp[0] + } + x[i] = sum + } + return + } + ix := kx + for i := 0; i < n; i++ { + u := min(1+k, n-i) + var sum float64 + atmp := a[i*lda:] + jx := incX + for j := 1; j < u; j++ { + sum += x[ix+jx] * atmp[j] + jx += incX + } + if nonunit { + sum += x[ix] * atmp[0] + } else { + sum += x[ix] + } + x[ix] = sum + ix += incX + } + return + } + if incX == 1 { + for i := n - 1; i >= 0; i-- { + l := max(0, k-i) + atmp := a[i*lda:] + var sum float64 + for j := l; j < k; j++ { + sum += x[i-k+j] * atmp[j] + } + if nonunit { + sum += x[i] * atmp[k] + } else { + sum += x[i] + } + x[i] = sum + } + return + } + ix := kx + (n-1)*incX + for i := n - 1; i >= 0; i-- { + l := max(0, k-i) + atmp := a[i*lda:] + var sum float64 + jx := l * incX + for j := l; j < k; j++ { + sum += x[ix-k*incX+jx] * atmp[j] + jx += incX + } + if nonunit { + sum += x[ix] * atmp[k] + } else { + sum += x[ix] + } + x[ix] = sum + ix -= incX + } + return + } + if ul == blas.Upper { + if incX == 1 { + for i := n - 1; i >= 0; i-- { + u := k + 1 + if i < u { + u = i + 1 + } + var sum float64 + for j := 1; j < u; j++ { + sum += x[i-j] * a[(i-j)*lda+j] + } + if nonunit { + sum += x[i] * a[i*lda] + } else { + sum += x[i] + } + x[i] = sum + } + return + } + ix := kx + (n-1)*incX + for i := n - 1; i >= 0; i-- { + u := k + 1 + if i < u { + u = i + 1 + } + var sum float64 + jx := incX + for j := 1; j < u; j++ { + sum += x[ix-jx] * a[(i-j)*lda+j] + jx += incX + } + if nonunit { + sum += x[ix] * a[i*lda] + } else { + sum += x[ix] + } + x[ix] = sum + ix -= incX + } + return + } + if incX == 1 { + for i := 0; i < n; i++ { + u := k + if i+k >= n { + u = n - i - 1 + } + var sum float64 + for j := 0; j < u; j++ { + sum += x[i+j+1] * a[(i+j+1)*lda+k-j-1] + } + if nonunit { + sum += x[i] * a[i*lda+k] + } else { + sum += x[i] + } + x[i] = sum + } + return + } + ix := kx + for i := 0; i < n; i++ { + u := k + if i+k >= n { + u = n - i - 1 + } + var ( + sum float64 + jx int + ) + for j := 0; j < u; j++ { + sum += x[ix+jx+incX] * a[(i+j+1)*lda+k-j-1] + jx += incX + } + if nonunit { + sum += x[ix] * a[i*lda+k] + } else { + sum += x[ix] + } + x[ix] = sum + ix += incX + } +} + +// Dtpmv performs one of the matrix-vector operations +// +// x = A * x if tA == blas.NoTrans +// x = Aᵀ * x if tA == blas.Trans or blas.ConjTrans +// +// where A is an n×n triangular matrix in packed format, and x is a vector. +func (Implementation) Dtpmv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n int, ap []float64, x []float64, incX int) { + if ul != blas.Lower && ul != blas.Upper { + panic(badUplo) + } + if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans { + panic(badTranspose) + } + if d != blas.NonUnit && d != blas.Unit { + panic(badDiag) + } + if n < 0 { + panic(nLT0) + } + if incX == 0 { + panic(zeroIncX) + } + + // Quick return if possible. + if n == 0 { + return + } + + // For zero matrix size the following slice length checks are trivially satisfied. + if len(ap) < n*(n+1)/2 { + panic(shortAP) + } + if (incX > 0 && len(x) <= (n-1)*incX) || (incX < 0 && len(x) <= (1-n)*incX) { + panic(shortX) + } + + var kx int + if incX < 0 { + kx = -(n - 1) * incX + } + + nonUnit := d == blas.NonUnit + var offset int // Offset is the index of (i,i) + if tA == blas.NoTrans { + if ul == blas.Upper { + if incX == 1 { + for i := 0; i < n; i++ { + xi := x[i] + if nonUnit { + xi *= ap[offset] + } + atmp := ap[offset+1 : offset+n-i] + xtmp := x[i+1:] + for j, v := range atmp { + xi += v * xtmp[j] + } + x[i] = xi + offset += n - i + } + return + } + ix := kx + for i := 0; i < n; i++ { + xix := x[ix] + if nonUnit { + xix *= ap[offset] + } + atmp := ap[offset+1 : offset+n-i] + jx := kx + (i+1)*incX + for _, v := range atmp { + xix += v * x[jx] + jx += incX + } + x[ix] = xix + offset += n - i + ix += incX + } + return + } + if incX == 1 { + offset = n*(n+1)/2 - 1 + for i := n - 1; i >= 0; i-- { + xi := x[i] + if nonUnit { + xi *= ap[offset] + } + atmp := ap[offset-i : offset] + for j, v := range atmp { + xi += v * x[j] + } + x[i] = xi + offset -= i + 1 + } + return + } + ix := kx + (n-1)*incX + offset = n*(n+1)/2 - 1 + for i := n - 1; i >= 0; i-- { + xix := x[ix] + if nonUnit { + xix *= ap[offset] + } + atmp := ap[offset-i : offset] + jx := kx + for _, v := range atmp { + xix += v * x[jx] + jx += incX + } + x[ix] = xix + offset -= i + 1 + ix -= incX + } + return + } + // Cases where ap is transposed. + if ul == blas.Upper { + if incX == 1 { + offset = n*(n+1)/2 - 1 + for i := n - 1; i >= 0; i-- { + xi := x[i] + atmp := ap[offset+1 : offset+n-i] + xtmp := x[i+1:] + for j, v := range atmp { + xtmp[j] += v * xi + } + if nonUnit { + x[i] *= ap[offset] + } + offset -= n - i + 1 + } + return + } + ix := kx + (n-1)*incX + offset = n*(n+1)/2 - 1 + for i := n - 1; i >= 0; i-- { + xix := x[ix] + jx := kx + (i+1)*incX + atmp := ap[offset+1 : offset+n-i] + for _, v := range atmp { + x[jx] += v * xix + jx += incX + } + if nonUnit { + x[ix] *= ap[offset] + } + offset -= n - i + 1 + ix -= incX + } + return + } + if incX == 1 { + for i := 0; i < n; i++ { + xi := x[i] + atmp := ap[offset-i : offset] + for j, v := range atmp { + x[j] += v * xi + } + if nonUnit { + x[i] *= ap[offset] + } + offset += i + 2 + } + return + } + ix := kx + for i := 0; i < n; i++ { + xix := x[ix] + jx := kx + atmp := ap[offset-i : offset] + for _, v := range atmp { + x[jx] += v * xix + jx += incX + } + if nonUnit { + x[ix] *= ap[offset] + } + ix += incX + offset += i + 2 + } +} + +// Dtbsv solves one of the systems of equations +// +// A * x = b if tA == blas.NoTrans +// Aᵀ * x = b if tA == blas.Trans or tA == blas.ConjTrans +// +// where A is an n×n triangular band matrix with k+1 diagonals, +// and x and b are vectors. +// +// At entry to the function, x contains the values of b, and the result is +// stored in-place into x. +// +// No test for singularity or near-singularity is included in this +// routine. Such tests must be performed before calling this routine. +func (Implementation) Dtbsv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n, k int, a []float64, lda int, x []float64, incX int) { + if ul != blas.Lower && ul != blas.Upper { + panic(badUplo) + } + if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans { + panic(badTranspose) + } + if d != blas.NonUnit && d != blas.Unit { + panic(badDiag) + } + if n < 0 { + panic(nLT0) + } + if k < 0 { + panic(kLT0) + } + if lda < k+1 { + panic(badLdA) + } + if incX == 0 { + panic(zeroIncX) + } + + // Quick return if possible. + if n == 0 { + return + } + + // For zero matrix size the following slice length checks are trivially satisfied. + if len(a) < lda*(n-1)+k+1 { + panic(shortA) + } + if (incX > 0 && len(x) <= (n-1)*incX) || (incX < 0 && len(x) <= (1-n)*incX) { + panic(shortX) + } + + var kx int + if incX < 0 { + kx = -(n - 1) * incX + } + nonUnit := d == blas.NonUnit + // Form x = A^-1 x. + // Several cases below use subslices for speed improvement. + // The incX != 1 cases usually do not because incX may be negative. + if tA == blas.NoTrans { + if ul == blas.Upper { + if incX == 1 { + for i := n - 1; i >= 0; i-- { + bands := k + if i+bands >= n { + bands = n - i - 1 + } + atmp := a[i*lda+1:] + xtmp := x[i+1 : i+bands+1] + var sum float64 + for j, v := range xtmp { + sum += v * atmp[j] + } + x[i] -= sum + if nonUnit { + x[i] /= a[i*lda] + } + } + return + } + ix := kx + (n-1)*incX + for i := n - 1; i >= 0; i-- { + max := k + 1 + if i+max > n { + max = n - i + } + atmp := a[i*lda:] + var ( + jx int + sum float64 + ) + for j := 1; j < max; j++ { + jx += incX + sum += x[ix+jx] * atmp[j] + } + x[ix] -= sum + if nonUnit { + x[ix] /= atmp[0] + } + ix -= incX + } + return + } + if incX == 1 { + for i := 0; i < n; i++ { + bands := k + if i-k < 0 { + bands = i + } + atmp := a[i*lda+k-bands:] + xtmp := x[i-bands : i] + var sum float64 + for j, v := range xtmp { + sum += v * atmp[j] + } + x[i] -= sum + if nonUnit { + x[i] /= atmp[bands] + } + } + return + } + ix := kx + for i := 0; i < n; i++ { + bands := k + if i-k < 0 { + bands = i + } + atmp := a[i*lda+k-bands:] + var ( + sum float64 + jx int + ) + for j := 0; j < bands; j++ { + sum += x[ix-bands*incX+jx] * atmp[j] + jx += incX + } + x[ix] -= sum + if nonUnit { + x[ix] /= atmp[bands] + } + ix += incX + } + return + } + // Cases where a is transposed. + if ul == blas.Upper { + if incX == 1 { + for i := 0; i < n; i++ { + bands := k + if i-k < 0 { + bands = i + } + var sum float64 + for j := 0; j < bands; j++ { + sum += x[i-bands+j] * a[(i-bands+j)*lda+bands-j] + } + x[i] -= sum + if nonUnit { + x[i] /= a[i*lda] + } + } + return + } + ix := kx + for i := 0; i < n; i++ { + bands := k + if i-k < 0 { + bands = i + } + var ( + sum float64 + jx int + ) + for j := 0; j < bands; j++ { + sum += x[ix-bands*incX+jx] * a[(i-bands+j)*lda+bands-j] + jx += incX + } + x[ix] -= sum + if nonUnit { + x[ix] /= a[i*lda] + } + ix += incX + } + return + } + if incX == 1 { + for i := n - 1; i >= 0; i-- { + bands := k + if i+bands >= n { + bands = n - i - 1 + } + var sum float64 + xtmp := x[i+1 : i+1+bands] + for j, v := range xtmp { + sum += v * a[(i+j+1)*lda+k-j-1] + } + x[i] -= sum + if nonUnit { + x[i] /= a[i*lda+k] + } + } + return + } + ix := kx + (n-1)*incX + for i := n - 1; i >= 0; i-- { + bands := k + if i+bands >= n { + bands = n - i - 1 + } + var ( + sum float64 + jx int + ) + for j := 0; j < bands; j++ { + sum += x[ix+jx+incX] * a[(i+j+1)*lda+k-j-1] + jx += incX + } + x[ix] -= sum + if nonUnit { + x[ix] /= a[i*lda+k] + } + ix -= incX + } +} + +// Dsbmv performs the matrix-vector operation +// +// y = alpha * A * x + beta * y +// +// where A is an n×n symmetric band matrix with k super-diagonals, x and y are +// vectors, and alpha and beta are scalars. +func (Implementation) Dsbmv(ul blas.Uplo, n, k int, alpha float64, a []float64, lda int, x []float64, incX int, beta float64, y []float64, incY int) { + if ul != blas.Lower && ul != blas.Upper { + panic(badUplo) + } + if n < 0 { + panic(nLT0) + } + if k < 0 { + panic(kLT0) + } + if lda < k+1 { + panic(badLdA) + } + if incX == 0 { + panic(zeroIncX) + } + if incY == 0 { + panic(zeroIncY) + } + + // Quick return if possible. + if n == 0 { + return + } + + // For zero matrix size the following slice length checks are trivially satisfied. + if len(a) < lda*(n-1)+k+1 { + panic(shortA) + } + if (incX > 0 && len(x) <= (n-1)*incX) || (incX < 0 && len(x) <= (1-n)*incX) { + panic(shortX) + } + if (incY > 0 && len(y) <= (n-1)*incY) || (incY < 0 && len(y) <= (1-n)*incY) { + panic(shortY) + } + + // Quick return if possible. + if alpha == 0 && beta == 1 { + return + } + + // Set up indexes + lenX := n + lenY := n + var kx, ky int + if incX < 0 { + kx = -(lenX - 1) * incX + } + if incY < 0 { + ky = -(lenY - 1) * incY + } + + // Form y = beta * y. + if beta != 1 { + if incY == 1 { + if beta == 0 { + for i := range y[:n] { + y[i] = 0 + } + } else { + f64.ScalUnitary(beta, y[:n]) + } + } else { + iy := ky + if beta == 0 { + for i := 0; i < n; i++ { + y[iy] = 0 + iy += incY + } + } else { + if incY > 0 { + f64.ScalInc(beta, y, uintptr(n), uintptr(incY)) + } else { + f64.ScalInc(beta, y, uintptr(n), uintptr(-incY)) + } + } + } + } + + if alpha == 0 { + return + } + + if ul == blas.Upper { + if incX == 1 { + iy := ky + for i := 0; i < n; i++ { + atmp := a[i*lda:] + tmp := alpha * x[i] + sum := tmp * atmp[0] + u := min(k, n-i-1) + jy := incY + for j := 1; j <= u; j++ { + v := atmp[j] + sum += alpha * x[i+j] * v + y[iy+jy] += tmp * v + jy += incY + } + y[iy] += sum + iy += incY + } + return + } + ix := kx + iy := ky + for i := 0; i < n; i++ { + atmp := a[i*lda:] + tmp := alpha * x[ix] + sum := tmp * atmp[0] + u := min(k, n-i-1) + jx := incX + jy := incY + for j := 1; j <= u; j++ { + v := atmp[j] + sum += alpha * x[ix+jx] * v + y[iy+jy] += tmp * v + jx += incX + jy += incY + } + y[iy] += sum + ix += incX + iy += incY + } + return + } + + // Cases where a has bands below the diagonal. + if incX == 1 { + iy := ky + for i := 0; i < n; i++ { + l := max(0, k-i) + tmp := alpha * x[i] + jy := l * incY + atmp := a[i*lda:] + for j := l; j < k; j++ { + v := atmp[j] + y[iy] += alpha * v * x[i-k+j] + y[iy-k*incY+jy] += tmp * v + jy += incY + } + y[iy] += tmp * atmp[k] + iy += incY + } + return + } + ix := kx + iy := ky + for i := 0; i < n; i++ { + l := max(0, k-i) + tmp := alpha * x[ix] + jx := l * incX + jy := l * incY + atmp := a[i*lda:] + for j := l; j < k; j++ { + v := atmp[j] + y[iy] += alpha * v * x[ix-k*incX+jx] + y[iy-k*incY+jy] += tmp * v + jx += incX + jy += incY + } + y[iy] += tmp * atmp[k] + ix += incX + iy += incY + } +} + +// Dsyr performs the symmetric rank-one update +// +// A += alpha * x * xᵀ +// +// where A is an n×n symmetric matrix, and x is a vector. +func (Implementation) Dsyr(ul blas.Uplo, n int, alpha float64, x []float64, incX int, a []float64, lda int) { + if ul != blas.Lower && ul != blas.Upper { + panic(badUplo) + } + if n < 0 { + panic(nLT0) + } + if lda < max(1, n) { + panic(badLdA) + } + if incX == 0 { + panic(zeroIncX) + } + + // Quick return if possible. + if n == 0 { + return + } + + // For zero matrix size the following slice length checks are trivially satisfied. + if (incX > 0 && len(x) <= (n-1)*incX) || (incX < 0 && len(x) <= (1-n)*incX) { + panic(shortX) + } + if len(a) < lda*(n-1)+n { + panic(shortA) + } + + // Quick return if possible. + if alpha == 0 { + return + } + + lenX := n + var kx int + if incX < 0 { + kx = -(lenX - 1) * incX + } + if ul == blas.Upper { + if incX == 1 { + for i := 0; i < n; i++ { + tmp := x[i] * alpha + if tmp != 0 { + atmp := a[i*lda+i : i*lda+n] + xtmp := x[i:n] + for j, v := range xtmp { + atmp[j] += v * tmp + } + } + } + return + } + ix := kx + for i := 0; i < n; i++ { + tmp := x[ix] * alpha + if tmp != 0 { + jx := ix + atmp := a[i*lda:] + for j := i; j < n; j++ { + atmp[j] += x[jx] * tmp + jx += incX + } + } + ix += incX + } + return + } + // Cases where a is lower triangular. + if incX == 1 { + for i := 0; i < n; i++ { + tmp := x[i] * alpha + if tmp != 0 { + atmp := a[i*lda:] + xtmp := x[:i+1] + for j, v := range xtmp { + atmp[j] += tmp * v + } + } + } + return + } + ix := kx + for i := 0; i < n; i++ { + tmp := x[ix] * alpha + if tmp != 0 { + atmp := a[i*lda:] + jx := kx + for j := 0; j < i+1; j++ { + atmp[j] += tmp * x[jx] + jx += incX + } + } + ix += incX + } +} + +// Dsyr2 performs the symmetric rank-two update +// +// A += alpha * x * yᵀ + alpha * y * xᵀ +// +// where A is an n×n symmetric matrix, x and y are vectors, and alpha is a scalar. +func (Implementation) Dsyr2(ul blas.Uplo, n int, alpha float64, x []float64, incX int, y []float64, incY int, a []float64, lda int) { + if ul != blas.Lower && ul != blas.Upper { + panic(badUplo) + } + if n < 0 { + panic(nLT0) + } + if lda < max(1, n) { + panic(badLdA) + } + if incX == 0 { + panic(zeroIncX) + } + if incY == 0 { + panic(zeroIncY) + } + + // Quick return if possible. + if n == 0 { + return + } + + // For zero matrix size the following slice length checks are trivially satisfied. + if (incX > 0 && len(x) <= (n-1)*incX) || (incX < 0 && len(x) <= (1-n)*incX) { + panic(shortX) + } + if (incY > 0 && len(y) <= (n-1)*incY) || (incY < 0 && len(y) <= (1-n)*incY) { + panic(shortY) + } + if len(a) < lda*(n-1)+n { + panic(shortA) + } + + // Quick return if possible. + if alpha == 0 { + return + } + + var ky, kx int + if incY < 0 { + ky = -(n - 1) * incY + } + if incX < 0 { + kx = -(n - 1) * incX + } + if ul == blas.Upper { + if incX == 1 && incY == 1 { + for i := 0; i < n; i++ { + xi := x[i] + yi := y[i] + atmp := a[i*lda:] + for j := i; j < n; j++ { + atmp[j] += alpha * (xi*y[j] + x[j]*yi) + } + } + return + } + ix := kx + iy := ky + for i := 0; i < n; i++ { + jx := kx + i*incX + jy := ky + i*incY + xi := x[ix] + yi := y[iy] + atmp := a[i*lda:] + for j := i; j < n; j++ { + atmp[j] += alpha * (xi*y[jy] + x[jx]*yi) + jx += incX + jy += incY + } + ix += incX + iy += incY + } + return + } + if incX == 1 && incY == 1 { + for i := 0; i < n; i++ { + xi := x[i] + yi := y[i] + atmp := a[i*lda:] + for j := 0; j <= i; j++ { + atmp[j] += alpha * (xi*y[j] + x[j]*yi) + } + } + return + } + ix := kx + iy := ky + for i := 0; i < n; i++ { + jx := kx + jy := ky + xi := x[ix] + yi := y[iy] + atmp := a[i*lda:] + for j := 0; j <= i; j++ { + atmp[j] += alpha * (xi*y[jy] + x[jx]*yi) + jx += incX + jy += incY + } + ix += incX + iy += incY + } +} + +// Dtpsv solves one of the systems of equations +// +// A * x = b if tA == blas.NoTrans +// Aᵀ * x = b if tA == blas.Trans or blas.ConjTrans +// +// where A is an n×n triangular matrix in packed format, and x and b are vectors. +// +// At entry to the function, x contains the values of b, and the result is +// stored in-place into x. +// +// No test for singularity or near-singularity is included in this +// routine. Such tests must be performed before calling this routine. +func (Implementation) Dtpsv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n int, ap []float64, x []float64, incX int) { + if ul != blas.Lower && ul != blas.Upper { + panic(badUplo) + } + if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans { + panic(badTranspose) + } + if d != blas.NonUnit && d != blas.Unit { + panic(badDiag) + } + if n < 0 { + panic(nLT0) + } + if incX == 0 { + panic(zeroIncX) + } + + // Quick return if possible. + if n == 0 { + return + } + + // For zero matrix size the following slice length checks are trivially satisfied. + if len(ap) < n*(n+1)/2 { + panic(shortAP) + } + if (incX > 0 && len(x) <= (n-1)*incX) || (incX < 0 && len(x) <= (1-n)*incX) { + panic(shortX) + } + + var kx int + if incX < 0 { + kx = -(n - 1) * incX + } + + nonUnit := d == blas.NonUnit + var offset int // Offset is the index of (i,i) + if tA == blas.NoTrans { + if ul == blas.Upper { + offset = n*(n+1)/2 - 1 + if incX == 1 { + for i := n - 1; i >= 0; i-- { + atmp := ap[offset+1 : offset+n-i] + xtmp := x[i+1:] + var sum float64 + for j, v := range atmp { + sum += v * xtmp[j] + } + x[i] -= sum + if nonUnit { + x[i] /= ap[offset] + } + offset -= n - i + 1 + } + return + } + ix := kx + (n-1)*incX + for i := n - 1; i >= 0; i-- { + atmp := ap[offset+1 : offset+n-i] + jx := kx + (i+1)*incX + var sum float64 + for _, v := range atmp { + sum += v * x[jx] + jx += incX + } + x[ix] -= sum + if nonUnit { + x[ix] /= ap[offset] + } + ix -= incX + offset -= n - i + 1 + } + return + } + if incX == 1 { + for i := 0; i < n; i++ { + atmp := ap[offset-i : offset] + var sum float64 + for j, v := range atmp { + sum += v * x[j] + } + x[i] -= sum + if nonUnit { + x[i] /= ap[offset] + } + offset += i + 2 + } + return + } + ix := kx + for i := 0; i < n; i++ { + jx := kx + atmp := ap[offset-i : offset] + var sum float64 + for _, v := range atmp { + sum += v * x[jx] + jx += incX + } + x[ix] -= sum + if nonUnit { + x[ix] /= ap[offset] + } + ix += incX + offset += i + 2 + } + return + } + // Cases where ap is transposed. + if ul == blas.Upper { + if incX == 1 { + for i := 0; i < n; i++ { + if nonUnit { + x[i] /= ap[offset] + } + xi := x[i] + atmp := ap[offset+1 : offset+n-i] + xtmp := x[i+1:] + for j, v := range atmp { + xtmp[j] -= v * xi + } + offset += n - i + } + return + } + ix := kx + for i := 0; i < n; i++ { + if nonUnit { + x[ix] /= ap[offset] + } + xix := x[ix] + atmp := ap[offset+1 : offset+n-i] + jx := kx + (i+1)*incX + for _, v := range atmp { + x[jx] -= v * xix + jx += incX + } + ix += incX + offset += n - i + } + return + } + if incX == 1 { + offset = n*(n+1)/2 - 1 + for i := n - 1; i >= 0; i-- { + if nonUnit { + x[i] /= ap[offset] + } + xi := x[i] + atmp := ap[offset-i : offset] + for j, v := range atmp { + x[j] -= v * xi + } + offset -= i + 1 + } + return + } + ix := kx + (n-1)*incX + offset = n*(n+1)/2 - 1 + for i := n - 1; i >= 0; i-- { + if nonUnit { + x[ix] /= ap[offset] + } + xix := x[ix] + atmp := ap[offset-i : offset] + jx := kx + for _, v := range atmp { + x[jx] -= v * xix + jx += incX + } + ix -= incX + offset -= i + 1 + } +} + +// Dspmv performs the matrix-vector operation +// +// y = alpha * A * x + beta * y +// +// where A is an n×n symmetric matrix in packed format, x and y are vectors, +// and alpha and beta are scalars. +func (Implementation) Dspmv(ul blas.Uplo, n int, alpha float64, ap []float64, x []float64, incX int, beta float64, y []float64, incY int) { + if ul != blas.Lower && ul != blas.Upper { + panic(badUplo) + } + if n < 0 { + panic(nLT0) + } + if incX == 0 { + panic(zeroIncX) + } + if incY == 0 { + panic(zeroIncY) + } + + // Quick return if possible. + if n == 0 { + return + } + + // For zero matrix size the following slice length checks are trivially satisfied. + if len(ap) < n*(n+1)/2 { + panic(shortAP) + } + if (incX > 0 && len(x) <= (n-1)*incX) || (incX < 0 && len(x) <= (1-n)*incX) { + panic(shortX) + } + if (incY > 0 && len(y) <= (n-1)*incY) || (incY < 0 && len(y) <= (1-n)*incY) { + panic(shortY) + } + + // Quick return if possible. + if alpha == 0 && beta == 1 { + return + } + + // Set up start points + var kx, ky int + if incX < 0 { + kx = -(n - 1) * incX + } + if incY < 0 { + ky = -(n - 1) * incY + } + + // Form y = beta * y. + if beta != 1 { + if incY == 1 { + if beta == 0 { + for i := range y[:n] { + y[i] = 0 + } + } else { + f64.ScalUnitary(beta, y[:n]) + } + } else { + iy := ky + if beta == 0 { + for i := 0; i < n; i++ { + y[iy] = 0 + iy += incY + } + } else { + if incY > 0 { + f64.ScalInc(beta, y, uintptr(n), uintptr(incY)) + } else { + f64.ScalInc(beta, y, uintptr(n), uintptr(-incY)) + } + } + } + } + + if alpha == 0 { + return + } + + if n == 1 { + y[0] += alpha * ap[0] * x[0] + return + } + var offset int // Offset is the index of (i,i). + if ul == blas.Upper { + if incX == 1 { + iy := ky + for i := 0; i < n; i++ { + xv := x[i] * alpha + sum := ap[offset] * x[i] + atmp := ap[offset+1 : offset+n-i] + xtmp := x[i+1:] + jy := ky + (i+1)*incY + for j, v := range atmp { + sum += v * xtmp[j] + y[jy] += v * xv + jy += incY + } + y[iy] += alpha * sum + iy += incY + offset += n - i + } + return + } + ix := kx + iy := ky + for i := 0; i < n; i++ { + xv := x[ix] * alpha + sum := ap[offset] * x[ix] + atmp := ap[offset+1 : offset+n-i] + jx := kx + (i+1)*incX + jy := ky + (i+1)*incY + for _, v := range atmp { + sum += v * x[jx] + y[jy] += v * xv + jx += incX + jy += incY + } + y[iy] += alpha * sum + ix += incX + iy += incY + offset += n - i + } + return + } + if incX == 1 { + iy := ky + for i := 0; i < n; i++ { + xv := x[i] * alpha + atmp := ap[offset-i : offset] + jy := ky + var sum float64 + for j, v := range atmp { + sum += v * x[j] + y[jy] += v * xv + jy += incY + } + sum += ap[offset] * x[i] + y[iy] += alpha * sum + iy += incY + offset += i + 2 + } + return + } + ix := kx + iy := ky + for i := 0; i < n; i++ { + xv := x[ix] * alpha + atmp := ap[offset-i : offset] + jx := kx + jy := ky + var sum float64 + for _, v := range atmp { + sum += v * x[jx] + y[jy] += v * xv + jx += incX + jy += incY + } + + sum += ap[offset] * x[ix] + y[iy] += alpha * sum + ix += incX + iy += incY + offset += i + 2 + } +} + +// Dspr performs the symmetric rank-one operation +// +// A += alpha * x * xᵀ +// +// where A is an n×n symmetric matrix in packed format, x is a vector, and +// alpha is a scalar. +func (Implementation) Dspr(ul blas.Uplo, n int, alpha float64, x []float64, incX int, ap []float64) { + if ul != blas.Lower && ul != blas.Upper { + panic(badUplo) + } + if n < 0 { + panic(nLT0) + } + if incX == 0 { + panic(zeroIncX) + } + + // Quick return if possible. + if n == 0 { + return + } + + // For zero matrix size the following slice length checks are trivially satisfied. + if (incX > 0 && len(x) <= (n-1)*incX) || (incX < 0 && len(x) <= (1-n)*incX) { + panic(shortX) + } + if len(ap) < n*(n+1)/2 { + panic(shortAP) + } + + // Quick return if possible. + if alpha == 0 { + return + } + + lenX := n + var kx int + if incX < 0 { + kx = -(lenX - 1) * incX + } + var offset int // Offset is the index of (i,i). + if ul == blas.Upper { + if incX == 1 { + for i := 0; i < n; i++ { + atmp := ap[offset:] + xv := alpha * x[i] + xtmp := x[i:n] + for j, v := range xtmp { + atmp[j] += xv * v + } + offset += n - i + } + return + } + ix := kx + for i := 0; i < n; i++ { + jx := kx + i*incX + atmp := ap[offset:] + xv := alpha * x[ix] + for j := 0; j < n-i; j++ { + atmp[j] += xv * x[jx] + jx += incX + } + ix += incX + offset += n - i + } + return + } + if incX == 1 { + for i := 0; i < n; i++ { + atmp := ap[offset-i:] + xv := alpha * x[i] + xtmp := x[:i+1] + for j, v := range xtmp { + atmp[j] += xv * v + } + offset += i + 2 + } + return + } + ix := kx + for i := 0; i < n; i++ { + jx := kx + atmp := ap[offset-i:] + xv := alpha * x[ix] + for j := 0; j <= i; j++ { + atmp[j] += xv * x[jx] + jx += incX + } + ix += incX + offset += i + 2 + } +} + +// Dspr2 performs the symmetric rank-2 update +// +// A += alpha * x * yᵀ + alpha * y * xᵀ +// +// where A is an n×n symmetric matrix in packed format, x and y are vectors, +// and alpha is a scalar. +func (Implementation) Dspr2(ul blas.Uplo, n int, alpha float64, x []float64, incX int, y []float64, incY int, ap []float64) { + if ul != blas.Lower && ul != blas.Upper { + panic(badUplo) + } + if n < 0 { + panic(nLT0) + } + if incX == 0 { + panic(zeroIncX) + } + if incY == 0 { + panic(zeroIncY) + } + + // Quick return if possible. + if n == 0 { + return + } + + // For zero matrix size the following slice length checks are trivially satisfied. + if (incX > 0 && len(x) <= (n-1)*incX) || (incX < 0 && len(x) <= (1-n)*incX) { + panic(shortX) + } + if (incY > 0 && len(y) <= (n-1)*incY) || (incY < 0 && len(y) <= (1-n)*incY) { + panic(shortY) + } + if len(ap) < n*(n+1)/2 { + panic(shortAP) + } + + // Quick return if possible. + if alpha == 0 { + return + } + + var ky, kx int + if incY < 0 { + ky = -(n - 1) * incY + } + if incX < 0 { + kx = -(n - 1) * incX + } + var offset int // Offset is the index of (i,i). + if ul == blas.Upper { + if incX == 1 && incY == 1 { + for i := 0; i < n; i++ { + atmp := ap[offset:] + xi := x[i] + yi := y[i] + xtmp := x[i:n] + ytmp := y[i:n] + for j, v := range xtmp { + atmp[j] += alpha * (xi*ytmp[j] + v*yi) + } + offset += n - i + } + return + } + ix := kx + iy := ky + for i := 0; i < n; i++ { + jx := kx + i*incX + jy := ky + i*incY + atmp := ap[offset:] + xi := x[ix] + yi := y[iy] + for j := 0; j < n-i; j++ { + atmp[j] += alpha * (xi*y[jy] + x[jx]*yi) + jx += incX + jy += incY + } + ix += incX + iy += incY + offset += n - i + } + return + } + if incX == 1 && incY == 1 { + for i := 0; i < n; i++ { + atmp := ap[offset-i:] + xi := x[i] + yi := y[i] + xtmp := x[:i+1] + for j, v := range xtmp { + atmp[j] += alpha * (xi*y[j] + v*yi) + } + offset += i + 2 + } + return + } + ix := kx + iy := ky + for i := 0; i < n; i++ { + jx := kx + jy := ky + atmp := ap[offset-i:] + for j := 0; j <= i; j++ { + atmp[j] += alpha * (x[ix]*y[jy] + x[jx]*y[iy]) + jx += incX + jy += incY + } + ix += incX + iy += incY + offset += i + 2 + } +} diff --git a/vendor/gonum.org/v1/gonum/blas/gonum/level3cmplx128.go b/vendor/gonum.org/v1/gonum/blas/gonum/level3cmplx128.go new file mode 100644 index 0000000000..bfff8c5579 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/blas/gonum/level3cmplx128.go @@ -0,0 +1,1751 @@ +// Copyright ©2019 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gonum + +import ( + "math/cmplx" + + "gonum.org/v1/gonum/blas" + "gonum.org/v1/gonum/internal/asm/c128" +) + +var _ blas.Complex128Level3 = Implementation{} + +// Zgemm performs one of the matrix-matrix operations +// +// C = alpha * op(A) * op(B) + beta * C +// +// where op(X) is one of +// +// op(X) = X or op(X) = Xᵀ or op(X) = Xᴴ, +// +// alpha and beta are scalars, and A, B and C are matrices, with op(A) an m×k matrix, +// op(B) a k×n matrix and C an m×n matrix. +func (Implementation) Zgemm(tA, tB blas.Transpose, m, n, k int, alpha complex128, a []complex128, lda int, b []complex128, ldb int, beta complex128, c []complex128, ldc int) { + switch tA { + default: + panic(badTranspose) + case blas.NoTrans, blas.Trans, blas.ConjTrans: + } + switch tB { + default: + panic(badTranspose) + case blas.NoTrans, blas.Trans, blas.ConjTrans: + } + switch { + case m < 0: + panic(mLT0) + case n < 0: + panic(nLT0) + case k < 0: + panic(kLT0) + } + rowA, colA := m, k + if tA != blas.NoTrans { + rowA, colA = k, m + } + if lda < max(1, colA) { + panic(badLdA) + } + rowB, colB := k, n + if tB != blas.NoTrans { + rowB, colB = n, k + } + if ldb < max(1, colB) { + panic(badLdB) + } + if ldc < max(1, n) { + panic(badLdC) + } + + // Quick return if possible. + if m == 0 || n == 0 { + return + } + + // For zero matrix size the following slice length checks are trivially satisfied. + if len(a) < (rowA-1)*lda+colA { + panic(shortA) + } + if len(b) < (rowB-1)*ldb+colB { + panic(shortB) + } + if len(c) < (m-1)*ldc+n { + panic(shortC) + } + + // Quick return if possible. + if (alpha == 0 || k == 0) && beta == 1 { + return + } + + if alpha == 0 { + if beta == 0 { + for i := 0; i < m; i++ { + for j := 0; j < n; j++ { + c[i*ldc+j] = 0 + } + } + } else { + for i := 0; i < m; i++ { + for j := 0; j < n; j++ { + c[i*ldc+j] *= beta + } + } + } + return + } + + switch tA { + case blas.NoTrans: + switch tB { + case blas.NoTrans: + // Form C = alpha * A * B + beta * C. + for i := 0; i < m; i++ { + switch { + case beta == 0: + for j := 0; j < n; j++ { + c[i*ldc+j] = 0 + } + case beta != 1: + for j := 0; j < n; j++ { + c[i*ldc+j] *= beta + } + } + for l := 0; l < k; l++ { + tmp := alpha * a[i*lda+l] + for j := 0; j < n; j++ { + c[i*ldc+j] += tmp * b[l*ldb+j] + } + } + } + case blas.Trans: + // Form C = alpha * A * Bᵀ + beta * C. + for i := 0; i < m; i++ { + switch { + case beta == 0: + for j := 0; j < n; j++ { + c[i*ldc+j] = 0 + } + case beta != 1: + for j := 0; j < n; j++ { + c[i*ldc+j] *= beta + } + } + for l := 0; l < k; l++ { + tmp := alpha * a[i*lda+l] + for j := 0; j < n; j++ { + c[i*ldc+j] += tmp * b[j*ldb+l] + } + } + } + case blas.ConjTrans: + // Form C = alpha * A * Bᴴ + beta * C. + for i := 0; i < m; i++ { + switch { + case beta == 0: + for j := 0; j < n; j++ { + c[i*ldc+j] = 0 + } + case beta != 1: + for j := 0; j < n; j++ { + c[i*ldc+j] *= beta + } + } + for l := 0; l < k; l++ { + tmp := alpha * a[i*lda+l] + for j := 0; j < n; j++ { + c[i*ldc+j] += tmp * cmplx.Conj(b[j*ldb+l]) + } + } + } + } + case blas.Trans: + switch tB { + case blas.NoTrans: + // Form C = alpha * Aᵀ * B + beta * C. + for i := 0; i < m; i++ { + for j := 0; j < n; j++ { + var tmp complex128 + for l := 0; l < k; l++ { + tmp += a[l*lda+i] * b[l*ldb+j] + } + if beta == 0 { + c[i*ldc+j] = alpha * tmp + } else { + c[i*ldc+j] = alpha*tmp + beta*c[i*ldc+j] + } + } + } + case blas.Trans: + // Form C = alpha * Aᵀ * Bᵀ + beta * C. + for i := 0; i < m; i++ { + for j := 0; j < n; j++ { + var tmp complex128 + for l := 0; l < k; l++ { + tmp += a[l*lda+i] * b[j*ldb+l] + } + if beta == 0 { + c[i*ldc+j] = alpha * tmp + } else { + c[i*ldc+j] = alpha*tmp + beta*c[i*ldc+j] + } + } + } + case blas.ConjTrans: + // Form C = alpha * Aᵀ * Bᴴ + beta * C. + for i := 0; i < m; i++ { + for j := 0; j < n; j++ { + var tmp complex128 + for l := 0; l < k; l++ { + tmp += a[l*lda+i] * cmplx.Conj(b[j*ldb+l]) + } + if beta == 0 { + c[i*ldc+j] = alpha * tmp + } else { + c[i*ldc+j] = alpha*tmp + beta*c[i*ldc+j] + } + } + } + } + case blas.ConjTrans: + switch tB { + case blas.NoTrans: + // Form C = alpha * Aᴴ * B + beta * C. + for i := 0; i < m; i++ { + for j := 0; j < n; j++ { + var tmp complex128 + for l := 0; l < k; l++ { + tmp += cmplx.Conj(a[l*lda+i]) * b[l*ldb+j] + } + if beta == 0 { + c[i*ldc+j] = alpha * tmp + } else { + c[i*ldc+j] = alpha*tmp + beta*c[i*ldc+j] + } + } + } + case blas.Trans: + // Form C = alpha * Aᴴ * Bᵀ + beta * C. + for i := 0; i < m; i++ { + for j := 0; j < n; j++ { + var tmp complex128 + for l := 0; l < k; l++ { + tmp += cmplx.Conj(a[l*lda+i]) * b[j*ldb+l] + } + if beta == 0 { + c[i*ldc+j] = alpha * tmp + } else { + c[i*ldc+j] = alpha*tmp + beta*c[i*ldc+j] + } + } + } + case blas.ConjTrans: + // Form C = alpha * Aᴴ * Bᴴ + beta * C. + for i := 0; i < m; i++ { + for j := 0; j < n; j++ { + var tmp complex128 + for l := 0; l < k; l++ { + tmp += cmplx.Conj(a[l*lda+i]) * cmplx.Conj(b[j*ldb+l]) + } + if beta == 0 { + c[i*ldc+j] = alpha * tmp + } else { + c[i*ldc+j] = alpha*tmp + beta*c[i*ldc+j] + } + } + } + } + } +} + +// Zhemm performs one of the matrix-matrix operations +// +// C = alpha*A*B + beta*C if side == blas.Left +// C = alpha*B*A + beta*C if side == blas.Right +// +// where alpha and beta are scalars, A is an m×m or n×n hermitian matrix and B +// and C are m×n matrices. The imaginary parts of the diagonal elements of A are +// assumed to be zero. +func (Implementation) Zhemm(side blas.Side, uplo blas.Uplo, m, n int, alpha complex128, a []complex128, lda int, b []complex128, ldb int, beta complex128, c []complex128, ldc int) { + na := m + if side == blas.Right { + na = n + } + switch { + case side != blas.Left && side != blas.Right: + panic(badSide) + case uplo != blas.Lower && uplo != blas.Upper: + panic(badUplo) + case m < 0: + panic(mLT0) + case n < 0: + panic(nLT0) + case lda < max(1, na): + panic(badLdA) + case ldb < max(1, n): + panic(badLdB) + case ldc < max(1, n): + panic(badLdC) + } + + // Quick return if possible. + if m == 0 || n == 0 { + return + } + + // For zero matrix size the following slice length checks are trivially satisfied. + if len(a) < lda*(na-1)+na { + panic(shortA) + } + if len(b) < ldb*(m-1)+n { + panic(shortB) + } + if len(c) < ldc*(m-1)+n { + panic(shortC) + } + + // Quick return if possible. + if alpha == 0 && beta == 1 { + return + } + + if alpha == 0 { + if beta == 0 { + for i := 0; i < m; i++ { + ci := c[i*ldc : i*ldc+n] + for j := range ci { + ci[j] = 0 + } + } + } else { + for i := 0; i < m; i++ { + ci := c[i*ldc : i*ldc+n] + c128.ScalUnitary(beta, ci) + } + } + return + } + + if side == blas.Left { + // Form C = alpha*A*B + beta*C. + for i := 0; i < m; i++ { + atmp := alpha * complex(real(a[i*lda+i]), 0) + bi := b[i*ldb : i*ldb+n] + ci := c[i*ldc : i*ldc+n] + if beta == 0 { + for j, bij := range bi { + ci[j] = atmp * bij + } + } else { + for j, bij := range bi { + ci[j] = atmp*bij + beta*ci[j] + } + } + if uplo == blas.Upper { + for k := 0; k < i; k++ { + atmp = alpha * cmplx.Conj(a[k*lda+i]) + c128.AxpyUnitary(atmp, b[k*ldb:k*ldb+n], ci) + } + for k := i + 1; k < m; k++ { + atmp = alpha * a[i*lda+k] + c128.AxpyUnitary(atmp, b[k*ldb:k*ldb+n], ci) + } + } else { + for k := 0; k < i; k++ { + atmp = alpha * a[i*lda+k] + c128.AxpyUnitary(atmp, b[k*ldb:k*ldb+n], ci) + } + for k := i + 1; k < m; k++ { + atmp = alpha * cmplx.Conj(a[k*lda+i]) + c128.AxpyUnitary(atmp, b[k*ldb:k*ldb+n], ci) + } + } + } + } else { + // Form C = alpha*B*A + beta*C. + if uplo == blas.Upper { + for i := 0; i < m; i++ { + for j := n - 1; j >= 0; j-- { + abij := alpha * b[i*ldb+j] + aj := a[j*lda+j+1 : j*lda+n] + bi := b[i*ldb+j+1 : i*ldb+n] + ci := c[i*ldc+j+1 : i*ldc+n] + var tmp complex128 + for k, ajk := range aj { + ci[k] += abij * ajk + tmp += bi[k] * cmplx.Conj(ajk) + } + ajj := complex(real(a[j*lda+j]), 0) + if beta == 0 { + c[i*ldc+j] = abij*ajj + alpha*tmp + } else { + c[i*ldc+j] = abij*ajj + alpha*tmp + beta*c[i*ldc+j] + } + } + } + } else { + for i := 0; i < m; i++ { + for j := 0; j < n; j++ { + abij := alpha * b[i*ldb+j] + aj := a[j*lda : j*lda+j] + bi := b[i*ldb : i*ldb+j] + ci := c[i*ldc : i*ldc+j] + var tmp complex128 + for k, ajk := range aj { + ci[k] += abij * ajk + tmp += bi[k] * cmplx.Conj(ajk) + } + ajj := complex(real(a[j*lda+j]), 0) + if beta == 0 { + c[i*ldc+j] = abij*ajj + alpha*tmp + } else { + c[i*ldc+j] = abij*ajj + alpha*tmp + beta*c[i*ldc+j] + } + } + } + } + } +} + +// Zherk performs one of the hermitian rank-k operations +// +// C = alpha*A*Aᴴ + beta*C if trans == blas.NoTrans +// C = alpha*Aᴴ*A + beta*C if trans == blas.ConjTrans +// +// where alpha and beta are real scalars, C is an n×n hermitian matrix and A is +// an n×k matrix in the first case and a k×n matrix in the second case. +// +// The imaginary parts of the diagonal elements of C are assumed to be zero, and +// on return they will be set to zero. +func (Implementation) Zherk(uplo blas.Uplo, trans blas.Transpose, n, k int, alpha float64, a []complex128, lda int, beta float64, c []complex128, ldc int) { + var rowA, colA int + switch trans { + default: + panic(badTranspose) + case blas.NoTrans: + rowA, colA = n, k + case blas.ConjTrans: + rowA, colA = k, n + } + switch { + case uplo != blas.Lower && uplo != blas.Upper: + panic(badUplo) + case n < 0: + panic(nLT0) + case k < 0: + panic(kLT0) + case lda < max(1, colA): + panic(badLdA) + case ldc < max(1, n): + panic(badLdC) + } + + // Quick return if possible. + if n == 0 { + return + } + + // For zero matrix size the following slice length checks are trivially satisfied. + if len(a) < (rowA-1)*lda+colA { + panic(shortA) + } + if len(c) < (n-1)*ldc+n { + panic(shortC) + } + + // Quick return if possible. + if (alpha == 0 || k == 0) && beta == 1 { + return + } + + if alpha == 0 { + if uplo == blas.Upper { + if beta == 0 { + for i := 0; i < n; i++ { + ci := c[i*ldc+i : i*ldc+n] + for j := range ci { + ci[j] = 0 + } + } + } else { + for i := 0; i < n; i++ { + ci := c[i*ldc+i : i*ldc+n] + ci[0] = complex(beta*real(ci[0]), 0) + if i != n-1 { + c128.DscalUnitary(beta, ci[1:]) + } + } + } + } else { + if beta == 0 { + for i := 0; i < n; i++ { + ci := c[i*ldc : i*ldc+i+1] + for j := range ci { + ci[j] = 0 + } + } + } else { + for i := 0; i < n; i++ { + ci := c[i*ldc : i*ldc+i+1] + if i != 0 { + c128.DscalUnitary(beta, ci[:i]) + } + ci[i] = complex(beta*real(ci[i]), 0) + } + } + } + return + } + + calpha := complex(alpha, 0) + if trans == blas.NoTrans { + // Form C = alpha*A*Aᴴ + beta*C. + cbeta := complex(beta, 0) + if uplo == blas.Upper { + for i := 0; i < n; i++ { + ci := c[i*ldc+i : i*ldc+n] + ai := a[i*lda : i*lda+k] + switch { + case beta == 0: + // Handle the i-th diagonal element of C. + ci[0] = complex(alpha*real(c128.DotcUnitary(ai, ai)), 0) + // Handle the remaining elements on the i-th row of C. + for jc := range ci[1:] { + j := i + 1 + jc + ci[jc+1] = calpha * c128.DotcUnitary(a[j*lda:j*lda+k], ai) + } + case beta != 1: + cii := calpha*c128.DotcUnitary(ai, ai) + cbeta*ci[0] + ci[0] = complex(real(cii), 0) + for jc, cij := range ci[1:] { + j := i + 1 + jc + ci[jc+1] = calpha*c128.DotcUnitary(a[j*lda:j*lda+k], ai) + cbeta*cij + } + default: + cii := calpha*c128.DotcUnitary(ai, ai) + ci[0] + ci[0] = complex(real(cii), 0) + for jc, cij := range ci[1:] { + j := i + 1 + jc + ci[jc+1] = calpha*c128.DotcUnitary(a[j*lda:j*lda+k], ai) + cij + } + } + } + } else { + for i := 0; i < n; i++ { + ci := c[i*ldc : i*ldc+i+1] + ai := a[i*lda : i*lda+k] + switch { + case beta == 0: + // Handle the first i-1 elements on the i-th row of C. + for j := range ci[:i] { + ci[j] = calpha * c128.DotcUnitary(a[j*lda:j*lda+k], ai) + } + // Handle the i-th diagonal element of C. + ci[i] = complex(alpha*real(c128.DotcUnitary(ai, ai)), 0) + case beta != 1: + for j, cij := range ci[:i] { + ci[j] = calpha*c128.DotcUnitary(a[j*lda:j*lda+k], ai) + cbeta*cij + } + cii := calpha*c128.DotcUnitary(ai, ai) + cbeta*ci[i] + ci[i] = complex(real(cii), 0) + default: + for j, cij := range ci[:i] { + ci[j] = calpha*c128.DotcUnitary(a[j*lda:j*lda+k], ai) + cij + } + cii := calpha*c128.DotcUnitary(ai, ai) + ci[i] + ci[i] = complex(real(cii), 0) + } + } + } + } else { + // Form C = alpha*Aᴴ*A + beta*C. + if uplo == blas.Upper { + for i := 0; i < n; i++ { + ci := c[i*ldc+i : i*ldc+n] + switch { + case beta == 0: + for jc := range ci { + ci[jc] = 0 + } + case beta != 1: + c128.DscalUnitary(beta, ci) + ci[0] = complex(real(ci[0]), 0) + default: + ci[0] = complex(real(ci[0]), 0) + } + for j := 0; j < k; j++ { + aji := cmplx.Conj(a[j*lda+i]) + if aji != 0 { + c128.AxpyUnitary(calpha*aji, a[j*lda+i:j*lda+n], ci) + } + } + c[i*ldc+i] = complex(real(c[i*ldc+i]), 0) + } + } else { + for i := 0; i < n; i++ { + ci := c[i*ldc : i*ldc+i+1] + switch { + case beta == 0: + for j := range ci { + ci[j] = 0 + } + case beta != 1: + c128.DscalUnitary(beta, ci) + ci[i] = complex(real(ci[i]), 0) + default: + ci[i] = complex(real(ci[i]), 0) + } + for j := 0; j < k; j++ { + aji := cmplx.Conj(a[j*lda+i]) + if aji != 0 { + c128.AxpyUnitary(calpha*aji, a[j*lda:j*lda+i+1], ci) + } + } + c[i*ldc+i] = complex(real(c[i*ldc+i]), 0) + } + } + } +} + +// Zher2k performs one of the hermitian rank-2k operations +// +// C = alpha*A*Bᴴ + conj(alpha)*B*Aᴴ + beta*C if trans == blas.NoTrans +// C = alpha*Aᴴ*B + conj(alpha)*Bᴴ*A + beta*C if trans == blas.ConjTrans +// +// where alpha and beta are scalars with beta real, C is an n×n hermitian matrix +// and A and B are n×k matrices in the first case and k×n matrices in the second case. +// +// The imaginary parts of the diagonal elements of C are assumed to be zero, and +// on return they will be set to zero. +func (Implementation) Zher2k(uplo blas.Uplo, trans blas.Transpose, n, k int, alpha complex128, a []complex128, lda int, b []complex128, ldb int, beta float64, c []complex128, ldc int) { + var row, col int + switch trans { + default: + panic(badTranspose) + case blas.NoTrans: + row, col = n, k + case blas.ConjTrans: + row, col = k, n + } + switch { + case uplo != blas.Lower && uplo != blas.Upper: + panic(badUplo) + case n < 0: + panic(nLT0) + case k < 0: + panic(kLT0) + case lda < max(1, col): + panic(badLdA) + case ldb < max(1, col): + panic(badLdB) + case ldc < max(1, n): + panic(badLdC) + } + + // Quick return if possible. + if n == 0 { + return + } + + // For zero matrix size the following slice length checks are trivially satisfied. + if len(a) < (row-1)*lda+col { + panic(shortA) + } + if len(b) < (row-1)*ldb+col { + panic(shortB) + } + if len(c) < (n-1)*ldc+n { + panic(shortC) + } + + // Quick return if possible. + if (alpha == 0 || k == 0) && beta == 1 { + return + } + + if alpha == 0 { + if uplo == blas.Upper { + if beta == 0 { + for i := 0; i < n; i++ { + ci := c[i*ldc+i : i*ldc+n] + for j := range ci { + ci[j] = 0 + } + } + } else { + for i := 0; i < n; i++ { + ci := c[i*ldc+i : i*ldc+n] + ci[0] = complex(beta*real(ci[0]), 0) + if i != n-1 { + c128.DscalUnitary(beta, ci[1:]) + } + } + } + } else { + if beta == 0 { + for i := 0; i < n; i++ { + ci := c[i*ldc : i*ldc+i+1] + for j := range ci { + ci[j] = 0 + } + } + } else { + for i := 0; i < n; i++ { + ci := c[i*ldc : i*ldc+i+1] + if i != 0 { + c128.DscalUnitary(beta, ci[:i]) + } + ci[i] = complex(beta*real(ci[i]), 0) + } + } + } + return + } + + conjalpha := cmplx.Conj(alpha) + cbeta := complex(beta, 0) + if trans == blas.NoTrans { + // Form C = alpha*A*Bᴴ + conj(alpha)*B*Aᴴ + beta*C. + if uplo == blas.Upper { + for i := 0; i < n; i++ { + ci := c[i*ldc+i+1 : i*ldc+n] + ai := a[i*lda : i*lda+k] + bi := b[i*ldb : i*ldb+k] + if beta == 0 { + cii := alpha*c128.DotcUnitary(bi, ai) + conjalpha*c128.DotcUnitary(ai, bi) + c[i*ldc+i] = complex(real(cii), 0) + for jc := range ci { + j := i + 1 + jc + ci[jc] = alpha*c128.DotcUnitary(b[j*ldb:j*ldb+k], ai) + conjalpha*c128.DotcUnitary(a[j*lda:j*lda+k], bi) + } + } else { + cii := alpha*c128.DotcUnitary(bi, ai) + conjalpha*c128.DotcUnitary(ai, bi) + cbeta*c[i*ldc+i] + c[i*ldc+i] = complex(real(cii), 0) + for jc, cij := range ci { + j := i + 1 + jc + ci[jc] = alpha*c128.DotcUnitary(b[j*ldb:j*ldb+k], ai) + conjalpha*c128.DotcUnitary(a[j*lda:j*lda+k], bi) + cbeta*cij + } + } + } + } else { + for i := 0; i < n; i++ { + ci := c[i*ldc : i*ldc+i] + ai := a[i*lda : i*lda+k] + bi := b[i*ldb : i*ldb+k] + if beta == 0 { + for j := range ci { + ci[j] = alpha*c128.DotcUnitary(b[j*ldb:j*ldb+k], ai) + conjalpha*c128.DotcUnitary(a[j*lda:j*lda+k], bi) + } + cii := alpha*c128.DotcUnitary(bi, ai) + conjalpha*c128.DotcUnitary(ai, bi) + c[i*ldc+i] = complex(real(cii), 0) + } else { + for j, cij := range ci { + ci[j] = alpha*c128.DotcUnitary(b[j*ldb:j*ldb+k], ai) + conjalpha*c128.DotcUnitary(a[j*lda:j*lda+k], bi) + cbeta*cij + } + cii := alpha*c128.DotcUnitary(bi, ai) + conjalpha*c128.DotcUnitary(ai, bi) + cbeta*c[i*ldc+i] + c[i*ldc+i] = complex(real(cii), 0) + } + } + } + } else { + // Form C = alpha*Aᴴ*B + conj(alpha)*Bᴴ*A + beta*C. + if uplo == blas.Upper { + for i := 0; i < n; i++ { + ci := c[i*ldc+i : i*ldc+n] + switch { + case beta == 0: + for jc := range ci { + ci[jc] = 0 + } + case beta != 1: + c128.DscalUnitary(beta, ci) + ci[0] = complex(real(ci[0]), 0) + default: + ci[0] = complex(real(ci[0]), 0) + } + for j := 0; j < k; j++ { + aji := a[j*lda+i] + bji := b[j*ldb+i] + if aji != 0 { + c128.AxpyUnitary(alpha*cmplx.Conj(aji), b[j*ldb+i:j*ldb+n], ci) + } + if bji != 0 { + c128.AxpyUnitary(conjalpha*cmplx.Conj(bji), a[j*lda+i:j*lda+n], ci) + } + } + ci[0] = complex(real(ci[0]), 0) + } + } else { + for i := 0; i < n; i++ { + ci := c[i*ldc : i*ldc+i+1] + switch { + case beta == 0: + for j := range ci { + ci[j] = 0 + } + case beta != 1: + c128.DscalUnitary(beta, ci) + ci[i] = complex(real(ci[i]), 0) + default: + ci[i] = complex(real(ci[i]), 0) + } + for j := 0; j < k; j++ { + aji := a[j*lda+i] + bji := b[j*ldb+i] + if aji != 0 { + c128.AxpyUnitary(alpha*cmplx.Conj(aji), b[j*ldb:j*ldb+i+1], ci) + } + if bji != 0 { + c128.AxpyUnitary(conjalpha*cmplx.Conj(bji), a[j*lda:j*lda+i+1], ci) + } + } + ci[i] = complex(real(ci[i]), 0) + } + } + } +} + +// Zsymm performs one of the matrix-matrix operations +// +// C = alpha*A*B + beta*C if side == blas.Left +// C = alpha*B*A + beta*C if side == blas.Right +// +// where alpha and beta are scalars, A is an m×m or n×n symmetric matrix and B +// and C are m×n matrices. +func (Implementation) Zsymm(side blas.Side, uplo blas.Uplo, m, n int, alpha complex128, a []complex128, lda int, b []complex128, ldb int, beta complex128, c []complex128, ldc int) { + na := m + if side == blas.Right { + na = n + } + switch { + case side != blas.Left && side != blas.Right: + panic(badSide) + case uplo != blas.Lower && uplo != blas.Upper: + panic(badUplo) + case m < 0: + panic(mLT0) + case n < 0: + panic(nLT0) + case lda < max(1, na): + panic(badLdA) + case ldb < max(1, n): + panic(badLdB) + case ldc < max(1, n): + panic(badLdC) + } + + // Quick return if possible. + if m == 0 || n == 0 { + return + } + + // For zero matrix size the following slice length checks are trivially satisfied. + if len(a) < lda*(na-1)+na { + panic(shortA) + } + if len(b) < ldb*(m-1)+n { + panic(shortB) + } + if len(c) < ldc*(m-1)+n { + panic(shortC) + } + + // Quick return if possible. + if alpha == 0 && beta == 1 { + return + } + + if alpha == 0 { + if beta == 0 { + for i := 0; i < m; i++ { + ci := c[i*ldc : i*ldc+n] + for j := range ci { + ci[j] = 0 + } + } + } else { + for i := 0; i < m; i++ { + ci := c[i*ldc : i*ldc+n] + c128.ScalUnitary(beta, ci) + } + } + return + } + + if side == blas.Left { + // Form C = alpha*A*B + beta*C. + for i := 0; i < m; i++ { + atmp := alpha * a[i*lda+i] + bi := b[i*ldb : i*ldb+n] + ci := c[i*ldc : i*ldc+n] + if beta == 0 { + for j, bij := range bi { + ci[j] = atmp * bij + } + } else { + for j, bij := range bi { + ci[j] = atmp*bij + beta*ci[j] + } + } + if uplo == blas.Upper { + for k := 0; k < i; k++ { + atmp = alpha * a[k*lda+i] + c128.AxpyUnitary(atmp, b[k*ldb:k*ldb+n], ci) + } + for k := i + 1; k < m; k++ { + atmp = alpha * a[i*lda+k] + c128.AxpyUnitary(atmp, b[k*ldb:k*ldb+n], ci) + } + } else { + for k := 0; k < i; k++ { + atmp = alpha * a[i*lda+k] + c128.AxpyUnitary(atmp, b[k*ldb:k*ldb+n], ci) + } + for k := i + 1; k < m; k++ { + atmp = alpha * a[k*lda+i] + c128.AxpyUnitary(atmp, b[k*ldb:k*ldb+n], ci) + } + } + } + } else { + // Form C = alpha*B*A + beta*C. + if uplo == blas.Upper { + for i := 0; i < m; i++ { + for j := n - 1; j >= 0; j-- { + abij := alpha * b[i*ldb+j] + aj := a[j*lda+j+1 : j*lda+n] + bi := b[i*ldb+j+1 : i*ldb+n] + ci := c[i*ldc+j+1 : i*ldc+n] + var tmp complex128 + for k, ajk := range aj { + ci[k] += abij * ajk + tmp += bi[k] * ajk + } + if beta == 0 { + c[i*ldc+j] = abij*a[j*lda+j] + alpha*tmp + } else { + c[i*ldc+j] = abij*a[j*lda+j] + alpha*tmp + beta*c[i*ldc+j] + } + } + } + } else { + for i := 0; i < m; i++ { + for j := 0; j < n; j++ { + abij := alpha * b[i*ldb+j] + aj := a[j*lda : j*lda+j] + bi := b[i*ldb : i*ldb+j] + ci := c[i*ldc : i*ldc+j] + var tmp complex128 + for k, ajk := range aj { + ci[k] += abij * ajk + tmp += bi[k] * ajk + } + if beta == 0 { + c[i*ldc+j] = abij*a[j*lda+j] + alpha*tmp + } else { + c[i*ldc+j] = abij*a[j*lda+j] + alpha*tmp + beta*c[i*ldc+j] + } + } + } + } + } +} + +// Zsyrk performs one of the symmetric rank-k operations +// +// C = alpha*A*Aᵀ + beta*C if trans == blas.NoTrans +// C = alpha*Aᵀ*A + beta*C if trans == blas.Trans +// +// where alpha and beta are scalars, C is an n×n symmetric matrix and A is +// an n×k matrix in the first case and a k×n matrix in the second case. +func (Implementation) Zsyrk(uplo blas.Uplo, trans blas.Transpose, n, k int, alpha complex128, a []complex128, lda int, beta complex128, c []complex128, ldc int) { + var rowA, colA int + switch trans { + default: + panic(badTranspose) + case blas.NoTrans: + rowA, colA = n, k + case blas.Trans: + rowA, colA = k, n + } + switch { + case uplo != blas.Lower && uplo != blas.Upper: + panic(badUplo) + case n < 0: + panic(nLT0) + case k < 0: + panic(kLT0) + case lda < max(1, colA): + panic(badLdA) + case ldc < max(1, n): + panic(badLdC) + } + + // Quick return if possible. + if n == 0 { + return + } + + // For zero matrix size the following slice length checks are trivially satisfied. + if len(a) < (rowA-1)*lda+colA { + panic(shortA) + } + if len(c) < (n-1)*ldc+n { + panic(shortC) + } + + // Quick return if possible. + if (alpha == 0 || k == 0) && beta == 1 { + return + } + + if alpha == 0 { + if uplo == blas.Upper { + if beta == 0 { + for i := 0; i < n; i++ { + ci := c[i*ldc+i : i*ldc+n] + for j := range ci { + ci[j] = 0 + } + } + } else { + for i := 0; i < n; i++ { + ci := c[i*ldc+i : i*ldc+n] + c128.ScalUnitary(beta, ci) + } + } + } else { + if beta == 0 { + for i := 0; i < n; i++ { + ci := c[i*ldc : i*ldc+i+1] + for j := range ci { + ci[j] = 0 + } + } + } else { + for i := 0; i < n; i++ { + ci := c[i*ldc : i*ldc+i+1] + c128.ScalUnitary(beta, ci) + } + } + } + return + } + + if trans == blas.NoTrans { + // Form C = alpha*A*Aᵀ + beta*C. + if uplo == blas.Upper { + for i := 0; i < n; i++ { + ci := c[i*ldc+i : i*ldc+n] + ai := a[i*lda : i*lda+k] + if beta == 0 { + for jc := range ci { + j := i + jc + ci[jc] = alpha * c128.DotuUnitary(ai, a[j*lda:j*lda+k]) + } + } else { + for jc, cij := range ci { + j := i + jc + ci[jc] = beta*cij + alpha*c128.DotuUnitary(ai, a[j*lda:j*lda+k]) + } + } + } + } else { + for i := 0; i < n; i++ { + ci := c[i*ldc : i*ldc+i+1] + ai := a[i*lda : i*lda+k] + if beta == 0 { + for j := range ci { + ci[j] = alpha * c128.DotuUnitary(ai, a[j*lda:j*lda+k]) + } + } else { + for j, cij := range ci { + ci[j] = beta*cij + alpha*c128.DotuUnitary(ai, a[j*lda:j*lda+k]) + } + } + } + } + } else { + // Form C = alpha*Aᵀ*A + beta*C. + if uplo == blas.Upper { + for i := 0; i < n; i++ { + ci := c[i*ldc+i : i*ldc+n] + switch { + case beta == 0: + for jc := range ci { + ci[jc] = 0 + } + case beta != 1: + for jc := range ci { + ci[jc] *= beta + } + } + for j := 0; j < k; j++ { + aji := a[j*lda+i] + if aji != 0 { + c128.AxpyUnitary(alpha*aji, a[j*lda+i:j*lda+n], ci) + } + } + } + } else { + for i := 0; i < n; i++ { + ci := c[i*ldc : i*ldc+i+1] + switch { + case beta == 0: + for j := range ci { + ci[j] = 0 + } + case beta != 1: + for j := range ci { + ci[j] *= beta + } + } + for j := 0; j < k; j++ { + aji := a[j*lda+i] + if aji != 0 { + c128.AxpyUnitary(alpha*aji, a[j*lda:j*lda+i+1], ci) + } + } + } + } + } +} + +// Zsyr2k performs one of the symmetric rank-2k operations +// +// C = alpha*A*Bᵀ + alpha*B*Aᵀ + beta*C if trans == blas.NoTrans +// C = alpha*Aᵀ*B + alpha*Bᵀ*A + beta*C if trans == blas.Trans +// +// where alpha and beta are scalars, C is an n×n symmetric matrix and A and B +// are n×k matrices in the first case and k×n matrices in the second case. +func (Implementation) Zsyr2k(uplo blas.Uplo, trans blas.Transpose, n, k int, alpha complex128, a []complex128, lda int, b []complex128, ldb int, beta complex128, c []complex128, ldc int) { + var row, col int + switch trans { + default: + panic(badTranspose) + case blas.NoTrans: + row, col = n, k + case blas.Trans: + row, col = k, n + } + switch { + case uplo != blas.Lower && uplo != blas.Upper: + panic(badUplo) + case n < 0: + panic(nLT0) + case k < 0: + panic(kLT0) + case lda < max(1, col): + panic(badLdA) + case ldb < max(1, col): + panic(badLdB) + case ldc < max(1, n): + panic(badLdC) + } + + // Quick return if possible. + if n == 0 { + return + } + + // For zero matrix size the following slice length checks are trivially satisfied. + if len(a) < (row-1)*lda+col { + panic(shortA) + } + if len(b) < (row-1)*ldb+col { + panic(shortB) + } + if len(c) < (n-1)*ldc+n { + panic(shortC) + } + + // Quick return if possible. + if (alpha == 0 || k == 0) && beta == 1 { + return + } + + if alpha == 0 { + if uplo == blas.Upper { + if beta == 0 { + for i := 0; i < n; i++ { + ci := c[i*ldc+i : i*ldc+n] + for j := range ci { + ci[j] = 0 + } + } + } else { + for i := 0; i < n; i++ { + ci := c[i*ldc+i : i*ldc+n] + c128.ScalUnitary(beta, ci) + } + } + } else { + if beta == 0 { + for i := 0; i < n; i++ { + ci := c[i*ldc : i*ldc+i+1] + for j := range ci { + ci[j] = 0 + } + } + } else { + for i := 0; i < n; i++ { + ci := c[i*ldc : i*ldc+i+1] + c128.ScalUnitary(beta, ci) + } + } + } + return + } + + if trans == blas.NoTrans { + // Form C = alpha*A*Bᵀ + alpha*B*Aᵀ + beta*C. + if uplo == blas.Upper { + for i := 0; i < n; i++ { + ci := c[i*ldc+i : i*ldc+n] + ai := a[i*lda : i*lda+k] + bi := b[i*ldb : i*ldb+k] + if beta == 0 { + for jc := range ci { + j := i + jc + ci[jc] = alpha*c128.DotuUnitary(ai, b[j*ldb:j*ldb+k]) + alpha*c128.DotuUnitary(bi, a[j*lda:j*lda+k]) + } + } else { + for jc, cij := range ci { + j := i + jc + ci[jc] = alpha*c128.DotuUnitary(ai, b[j*ldb:j*ldb+k]) + alpha*c128.DotuUnitary(bi, a[j*lda:j*lda+k]) + beta*cij + } + } + } + } else { + for i := 0; i < n; i++ { + ci := c[i*ldc : i*ldc+i+1] + ai := a[i*lda : i*lda+k] + bi := b[i*ldb : i*ldb+k] + if beta == 0 { + for j := range ci { + ci[j] = alpha*c128.DotuUnitary(ai, b[j*ldb:j*ldb+k]) + alpha*c128.DotuUnitary(bi, a[j*lda:j*lda+k]) + } + } else { + for j, cij := range ci { + ci[j] = alpha*c128.DotuUnitary(ai, b[j*ldb:j*ldb+k]) + alpha*c128.DotuUnitary(bi, a[j*lda:j*lda+k]) + beta*cij + } + } + } + } + } else { + // Form C = alpha*Aᵀ*B + alpha*Bᵀ*A + beta*C. + if uplo == blas.Upper { + for i := 0; i < n; i++ { + ci := c[i*ldc+i : i*ldc+n] + switch { + case beta == 0: + for jc := range ci { + ci[jc] = 0 + } + case beta != 1: + for jc := range ci { + ci[jc] *= beta + } + } + for j := 0; j < k; j++ { + aji := a[j*lda+i] + bji := b[j*ldb+i] + if aji != 0 { + c128.AxpyUnitary(alpha*aji, b[j*ldb+i:j*ldb+n], ci) + } + if bji != 0 { + c128.AxpyUnitary(alpha*bji, a[j*lda+i:j*lda+n], ci) + } + } + } + } else { + for i := 0; i < n; i++ { + ci := c[i*ldc : i*ldc+i+1] + switch { + case beta == 0: + for j := range ci { + ci[j] = 0 + } + case beta != 1: + for j := range ci { + ci[j] *= beta + } + } + for j := 0; j < k; j++ { + aji := a[j*lda+i] + bji := b[j*ldb+i] + if aji != 0 { + c128.AxpyUnitary(alpha*aji, b[j*ldb:j*ldb+i+1], ci) + } + if bji != 0 { + c128.AxpyUnitary(alpha*bji, a[j*lda:j*lda+i+1], ci) + } + } + } + } + } +} + +// Ztrmm performs one of the matrix-matrix operations +// +// B = alpha * op(A) * B if side == blas.Left, +// B = alpha * B * op(A) if side == blas.Right, +// +// where alpha is a scalar, B is an m×n matrix, A is a unit, or non-unit, +// upper or lower triangular matrix and op(A) is one of +// +// op(A) = A if trans == blas.NoTrans, +// op(A) = Aᵀ if trans == blas.Trans, +// op(A) = Aᴴ if trans == blas.ConjTrans. +func (Implementation) Ztrmm(side blas.Side, uplo blas.Uplo, trans blas.Transpose, diag blas.Diag, m, n int, alpha complex128, a []complex128, lda int, b []complex128, ldb int) { + na := m + if side == blas.Right { + na = n + } + switch { + case side != blas.Left && side != blas.Right: + panic(badSide) + case uplo != blas.Lower && uplo != blas.Upper: + panic(badUplo) + case trans != blas.NoTrans && trans != blas.Trans && trans != blas.ConjTrans: + panic(badTranspose) + case diag != blas.Unit && diag != blas.NonUnit: + panic(badDiag) + case m < 0: + panic(mLT0) + case n < 0: + panic(nLT0) + case lda < max(1, na): + panic(badLdA) + case ldb < max(1, n): + panic(badLdB) + } + + // Quick return if possible. + if m == 0 || n == 0 { + return + } + + // For zero matrix size the following slice length checks are trivially satisfied. + if len(a) < (na-1)*lda+na { + panic(shortA) + } + if len(b) < (m-1)*ldb+n { + panic(shortB) + } + + // Quick return if possible. + if alpha == 0 { + for i := 0; i < m; i++ { + bi := b[i*ldb : i*ldb+n] + for j := range bi { + bi[j] = 0 + } + } + return + } + + noConj := trans != blas.ConjTrans + noUnit := diag == blas.NonUnit + if side == blas.Left { + if trans == blas.NoTrans { + // Form B = alpha*A*B. + if uplo == blas.Upper { + for i := 0; i < m; i++ { + aii := alpha + if noUnit { + aii *= a[i*lda+i] + } + bi := b[i*ldb : i*ldb+n] + for j := range bi { + bi[j] *= aii + } + for ja, aij := range a[i*lda+i+1 : i*lda+m] { + j := ja + i + 1 + if aij != 0 { + c128.AxpyUnitary(alpha*aij, b[j*ldb:j*ldb+n], bi) + } + } + } + } else { + for i := m - 1; i >= 0; i-- { + aii := alpha + if noUnit { + aii *= a[i*lda+i] + } + bi := b[i*ldb : i*ldb+n] + for j := range bi { + bi[j] *= aii + } + for j, aij := range a[i*lda : i*lda+i] { + if aij != 0 { + c128.AxpyUnitary(alpha*aij, b[j*ldb:j*ldb+n], bi) + } + } + } + } + } else { + // Form B = alpha*Aᵀ*B or B = alpha*Aᴴ*B. + if uplo == blas.Upper { + for k := m - 1; k >= 0; k-- { + bk := b[k*ldb : k*ldb+n] + for ja, ajk := range a[k*lda+k+1 : k*lda+m] { + if ajk == 0 { + continue + } + j := k + 1 + ja + if noConj { + c128.AxpyUnitary(alpha*ajk, bk, b[j*ldb:j*ldb+n]) + } else { + c128.AxpyUnitary(alpha*cmplx.Conj(ajk), bk, b[j*ldb:j*ldb+n]) + } + } + akk := alpha + if noUnit { + if noConj { + akk *= a[k*lda+k] + } else { + akk *= cmplx.Conj(a[k*lda+k]) + } + } + if akk != 1 { + c128.ScalUnitary(akk, bk) + } + } + } else { + for k := 0; k < m; k++ { + bk := b[k*ldb : k*ldb+n] + for j, ajk := range a[k*lda : k*lda+k] { + if ajk == 0 { + continue + } + if noConj { + c128.AxpyUnitary(alpha*ajk, bk, b[j*ldb:j*ldb+n]) + } else { + c128.AxpyUnitary(alpha*cmplx.Conj(ajk), bk, b[j*ldb:j*ldb+n]) + } + } + akk := alpha + if noUnit { + if noConj { + akk *= a[k*lda+k] + } else { + akk *= cmplx.Conj(a[k*lda+k]) + } + } + if akk != 1 { + c128.ScalUnitary(akk, bk) + } + } + } + } + } else { + if trans == blas.NoTrans { + // Form B = alpha*B*A. + if uplo == blas.Upper { + for i := 0; i < m; i++ { + bi := b[i*ldb : i*ldb+n] + for k := n - 1; k >= 0; k-- { + abik := alpha * bi[k] + if abik == 0 { + continue + } + bi[k] = abik + if noUnit { + bi[k] *= a[k*lda+k] + } + c128.AxpyUnitary(abik, a[k*lda+k+1:k*lda+n], bi[k+1:]) + } + } + } else { + for i := 0; i < m; i++ { + bi := b[i*ldb : i*ldb+n] + for k := 0; k < n; k++ { + abik := alpha * bi[k] + if abik == 0 { + continue + } + bi[k] = abik + if noUnit { + bi[k] *= a[k*lda+k] + } + c128.AxpyUnitary(abik, a[k*lda:k*lda+k], bi[:k]) + } + } + } + } else { + // Form B = alpha*B*Aᵀ or B = alpha*B*Aᴴ. + if uplo == blas.Upper { + for i := 0; i < m; i++ { + bi := b[i*ldb : i*ldb+n] + for j, bij := range bi { + if noConj { + if noUnit { + bij *= a[j*lda+j] + } + bij += c128.DotuUnitary(a[j*lda+j+1:j*lda+n], bi[j+1:n]) + } else { + if noUnit { + bij *= cmplx.Conj(a[j*lda+j]) + } + bij += c128.DotcUnitary(a[j*lda+j+1:j*lda+n], bi[j+1:n]) + } + bi[j] = alpha * bij + } + } + } else { + for i := 0; i < m; i++ { + bi := b[i*ldb : i*ldb+n] + for j := n - 1; j >= 0; j-- { + bij := bi[j] + if noConj { + if noUnit { + bij *= a[j*lda+j] + } + bij += c128.DotuUnitary(a[j*lda:j*lda+j], bi[:j]) + } else { + if noUnit { + bij *= cmplx.Conj(a[j*lda+j]) + } + bij += c128.DotcUnitary(a[j*lda:j*lda+j], bi[:j]) + } + bi[j] = alpha * bij + } + } + } + } + } +} + +// Ztrsm solves one of the matrix equations +// +// op(A) * X = alpha * B if side == blas.Left, +// X * op(A) = alpha * B if side == blas.Right, +// +// where alpha is a scalar, X and B are m×n matrices, A is a unit or +// non-unit, upper or lower triangular matrix and op(A) is one of +// +// op(A) = A if transA == blas.NoTrans, +// op(A) = Aᵀ if transA == blas.Trans, +// op(A) = Aᴴ if transA == blas.ConjTrans. +// +// On return the matrix X is overwritten on B. +func (Implementation) Ztrsm(side blas.Side, uplo blas.Uplo, transA blas.Transpose, diag blas.Diag, m, n int, alpha complex128, a []complex128, lda int, b []complex128, ldb int) { + na := m + if side == blas.Right { + na = n + } + switch { + case side != blas.Left && side != blas.Right: + panic(badSide) + case uplo != blas.Lower && uplo != blas.Upper: + panic(badUplo) + case transA != blas.NoTrans && transA != blas.Trans && transA != blas.ConjTrans: + panic(badTranspose) + case diag != blas.Unit && diag != blas.NonUnit: + panic(badDiag) + case m < 0: + panic(mLT0) + case n < 0: + panic(nLT0) + case lda < max(1, na): + panic(badLdA) + case ldb < max(1, n): + panic(badLdB) + } + + // Quick return if possible. + if m == 0 || n == 0 { + return + } + + // For zero matrix size the following slice length checks are trivially satisfied. + if len(a) < (na-1)*lda+na { + panic(shortA) + } + if len(b) < (m-1)*ldb+n { + panic(shortB) + } + + if alpha == 0 { + for i := 0; i < m; i++ { + for j := 0; j < n; j++ { + b[i*ldb+j] = 0 + } + } + return + } + + noConj := transA != blas.ConjTrans + noUnit := diag == blas.NonUnit + if side == blas.Left { + if transA == blas.NoTrans { + // Form B = alpha*inv(A)*B. + if uplo == blas.Upper { + for i := m - 1; i >= 0; i-- { + bi := b[i*ldb : i*ldb+n] + if alpha != 1 { + c128.ScalUnitary(alpha, bi) + } + for ka, aik := range a[i*lda+i+1 : i*lda+m] { + k := i + 1 + ka + if aik != 0 { + c128.AxpyUnitary(-aik, b[k*ldb:k*ldb+n], bi) + } + } + if noUnit { + c128.ScalUnitary(1/a[i*lda+i], bi) + } + } + } else { + for i := 0; i < m; i++ { + bi := b[i*ldb : i*ldb+n] + if alpha != 1 { + c128.ScalUnitary(alpha, bi) + } + for j, aij := range a[i*lda : i*lda+i] { + if aij != 0 { + c128.AxpyUnitary(-aij, b[j*ldb:j*ldb+n], bi) + } + } + if noUnit { + c128.ScalUnitary(1/a[i*lda+i], bi) + } + } + } + } else { + // Form B = alpha*inv(Aᵀ)*B or B = alpha*inv(Aᴴ)*B. + if uplo == blas.Upper { + for i := 0; i < m; i++ { + bi := b[i*ldb : i*ldb+n] + if noUnit { + if noConj { + c128.ScalUnitary(1/a[i*lda+i], bi) + } else { + c128.ScalUnitary(1/cmplx.Conj(a[i*lda+i]), bi) + } + } + for ja, aij := range a[i*lda+i+1 : i*lda+m] { + if aij == 0 { + continue + } + j := i + 1 + ja + if noConj { + c128.AxpyUnitary(-aij, bi, b[j*ldb:j*ldb+n]) + } else { + c128.AxpyUnitary(-cmplx.Conj(aij), bi, b[j*ldb:j*ldb+n]) + } + } + if alpha != 1 { + c128.ScalUnitary(alpha, bi) + } + } + } else { + for i := m - 1; i >= 0; i-- { + bi := b[i*ldb : i*ldb+n] + if noUnit { + if noConj { + c128.ScalUnitary(1/a[i*lda+i], bi) + } else { + c128.ScalUnitary(1/cmplx.Conj(a[i*lda+i]), bi) + } + } + for j, aij := range a[i*lda : i*lda+i] { + if aij == 0 { + continue + } + if noConj { + c128.AxpyUnitary(-aij, bi, b[j*ldb:j*ldb+n]) + } else { + c128.AxpyUnitary(-cmplx.Conj(aij), bi, b[j*ldb:j*ldb+n]) + } + } + if alpha != 1 { + c128.ScalUnitary(alpha, bi) + } + } + } + } + } else { + if transA == blas.NoTrans { + // Form B = alpha*B*inv(A). + if uplo == blas.Upper { + for i := 0; i < m; i++ { + bi := b[i*ldb : i*ldb+n] + if alpha != 1 { + c128.ScalUnitary(alpha, bi) + } + for j, bij := range bi { + if bij == 0 { + continue + } + if noUnit { + bi[j] /= a[j*lda+j] + } + c128.AxpyUnitary(-bi[j], a[j*lda+j+1:j*lda+n], bi[j+1:n]) + } + } + } else { + for i := 0; i < m; i++ { + bi := b[i*ldb : i*ldb+n] + if alpha != 1 { + c128.ScalUnitary(alpha, bi) + } + for j := n - 1; j >= 0; j-- { + if bi[j] == 0 { + continue + } + if noUnit { + bi[j] /= a[j*lda+j] + } + c128.AxpyUnitary(-bi[j], a[j*lda:j*lda+j], bi[:j]) + } + } + } + } else { + // Form B = alpha*B*inv(Aᵀ) or B = alpha*B*inv(Aᴴ). + if uplo == blas.Upper { + for i := 0; i < m; i++ { + bi := b[i*ldb : i*ldb+n] + for j := n - 1; j >= 0; j-- { + bij := alpha * bi[j] + if noConj { + bij -= c128.DotuUnitary(a[j*lda+j+1:j*lda+n], bi[j+1:n]) + if noUnit { + bij /= a[j*lda+j] + } + } else { + bij -= c128.DotcUnitary(a[j*lda+j+1:j*lda+n], bi[j+1:n]) + if noUnit { + bij /= cmplx.Conj(a[j*lda+j]) + } + } + bi[j] = bij + } + } + } else { + for i := 0; i < m; i++ { + bi := b[i*ldb : i*ldb+n] + for j, bij := range bi { + bij *= alpha + if noConj { + bij -= c128.DotuUnitary(a[j*lda:j*lda+j], bi[:j]) + if noUnit { + bij /= a[j*lda+j] + } + } else { + bij -= c128.DotcUnitary(a[j*lda:j*lda+j], bi[:j]) + if noUnit { + bij /= cmplx.Conj(a[j*lda+j]) + } + } + bi[j] = bij + } + } + } + } + } +} diff --git a/vendor/gonum.org/v1/gonum/blas/gonum/level3cmplx64.go b/vendor/gonum.org/v1/gonum/blas/gonum/level3cmplx64.go new file mode 100644 index 0000000000..b7fb5a2c4e --- /dev/null +++ b/vendor/gonum.org/v1/gonum/blas/gonum/level3cmplx64.go @@ -0,0 +1,1771 @@ +// Code generated by "go generate gonum.org/v1/gonum/blas/gonum”; DO NOT EDIT. + +// Copyright ©2019 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gonum + +import ( + cmplx "gonum.org/v1/gonum/internal/cmplx64" + + "gonum.org/v1/gonum/blas" + "gonum.org/v1/gonum/internal/asm/c64" +) + +var _ blas.Complex64Level3 = Implementation{} + +// Cgemm performs one of the matrix-matrix operations +// +// C = alpha * op(A) * op(B) + beta * C +// +// where op(X) is one of +// +// op(X) = X or op(X) = Xᵀ or op(X) = Xᴴ, +// +// alpha and beta are scalars, and A, B and C are matrices, with op(A) an m×k matrix, +// op(B) a k×n matrix and C an m×n matrix. +// +// Complex64 implementations are autogenerated and not directly tested. +func (Implementation) Cgemm(tA, tB blas.Transpose, m, n, k int, alpha complex64, a []complex64, lda int, b []complex64, ldb int, beta complex64, c []complex64, ldc int) { + switch tA { + default: + panic(badTranspose) + case blas.NoTrans, blas.Trans, blas.ConjTrans: + } + switch tB { + default: + panic(badTranspose) + case blas.NoTrans, blas.Trans, blas.ConjTrans: + } + switch { + case m < 0: + panic(mLT0) + case n < 0: + panic(nLT0) + case k < 0: + panic(kLT0) + } + rowA, colA := m, k + if tA != blas.NoTrans { + rowA, colA = k, m + } + if lda < max(1, colA) { + panic(badLdA) + } + rowB, colB := k, n + if tB != blas.NoTrans { + rowB, colB = n, k + } + if ldb < max(1, colB) { + panic(badLdB) + } + if ldc < max(1, n) { + panic(badLdC) + } + + // Quick return if possible. + if m == 0 || n == 0 { + return + } + + // For zero matrix size the following slice length checks are trivially satisfied. + if len(a) < (rowA-1)*lda+colA { + panic(shortA) + } + if len(b) < (rowB-1)*ldb+colB { + panic(shortB) + } + if len(c) < (m-1)*ldc+n { + panic(shortC) + } + + // Quick return if possible. + if (alpha == 0 || k == 0) && beta == 1 { + return + } + + if alpha == 0 { + if beta == 0 { + for i := 0; i < m; i++ { + for j := 0; j < n; j++ { + c[i*ldc+j] = 0 + } + } + } else { + for i := 0; i < m; i++ { + for j := 0; j < n; j++ { + c[i*ldc+j] *= beta + } + } + } + return + } + + switch tA { + case blas.NoTrans: + switch tB { + case blas.NoTrans: + // Form C = alpha * A * B + beta * C. + for i := 0; i < m; i++ { + switch { + case beta == 0: + for j := 0; j < n; j++ { + c[i*ldc+j] = 0 + } + case beta != 1: + for j := 0; j < n; j++ { + c[i*ldc+j] *= beta + } + } + for l := 0; l < k; l++ { + tmp := alpha * a[i*lda+l] + for j := 0; j < n; j++ { + c[i*ldc+j] += tmp * b[l*ldb+j] + } + } + } + case blas.Trans: + // Form C = alpha * A * Bᵀ + beta * C. + for i := 0; i < m; i++ { + switch { + case beta == 0: + for j := 0; j < n; j++ { + c[i*ldc+j] = 0 + } + case beta != 1: + for j := 0; j < n; j++ { + c[i*ldc+j] *= beta + } + } + for l := 0; l < k; l++ { + tmp := alpha * a[i*lda+l] + for j := 0; j < n; j++ { + c[i*ldc+j] += tmp * b[j*ldb+l] + } + } + } + case blas.ConjTrans: + // Form C = alpha * A * Bᴴ + beta * C. + for i := 0; i < m; i++ { + switch { + case beta == 0: + for j := 0; j < n; j++ { + c[i*ldc+j] = 0 + } + case beta != 1: + for j := 0; j < n; j++ { + c[i*ldc+j] *= beta + } + } + for l := 0; l < k; l++ { + tmp := alpha * a[i*lda+l] + for j := 0; j < n; j++ { + c[i*ldc+j] += tmp * cmplx.Conj(b[j*ldb+l]) + } + } + } + } + case blas.Trans: + switch tB { + case blas.NoTrans: + // Form C = alpha * Aᵀ * B + beta * C. + for i := 0; i < m; i++ { + for j := 0; j < n; j++ { + var tmp complex64 + for l := 0; l < k; l++ { + tmp += a[l*lda+i] * b[l*ldb+j] + } + if beta == 0 { + c[i*ldc+j] = alpha * tmp + } else { + c[i*ldc+j] = alpha*tmp + beta*c[i*ldc+j] + } + } + } + case blas.Trans: + // Form C = alpha * Aᵀ * Bᵀ + beta * C. + for i := 0; i < m; i++ { + for j := 0; j < n; j++ { + var tmp complex64 + for l := 0; l < k; l++ { + tmp += a[l*lda+i] * b[j*ldb+l] + } + if beta == 0 { + c[i*ldc+j] = alpha * tmp + } else { + c[i*ldc+j] = alpha*tmp + beta*c[i*ldc+j] + } + } + } + case blas.ConjTrans: + // Form C = alpha * Aᵀ * Bᴴ + beta * C. + for i := 0; i < m; i++ { + for j := 0; j < n; j++ { + var tmp complex64 + for l := 0; l < k; l++ { + tmp += a[l*lda+i] * cmplx.Conj(b[j*ldb+l]) + } + if beta == 0 { + c[i*ldc+j] = alpha * tmp + } else { + c[i*ldc+j] = alpha*tmp + beta*c[i*ldc+j] + } + } + } + } + case blas.ConjTrans: + switch tB { + case blas.NoTrans: + // Form C = alpha * Aᴴ * B + beta * C. + for i := 0; i < m; i++ { + for j := 0; j < n; j++ { + var tmp complex64 + for l := 0; l < k; l++ { + tmp += cmplx.Conj(a[l*lda+i]) * b[l*ldb+j] + } + if beta == 0 { + c[i*ldc+j] = alpha * tmp + } else { + c[i*ldc+j] = alpha*tmp + beta*c[i*ldc+j] + } + } + } + case blas.Trans: + // Form C = alpha * Aᴴ * Bᵀ + beta * C. + for i := 0; i < m; i++ { + for j := 0; j < n; j++ { + var tmp complex64 + for l := 0; l < k; l++ { + tmp += cmplx.Conj(a[l*lda+i]) * b[j*ldb+l] + } + if beta == 0 { + c[i*ldc+j] = alpha * tmp + } else { + c[i*ldc+j] = alpha*tmp + beta*c[i*ldc+j] + } + } + } + case blas.ConjTrans: + // Form C = alpha * Aᴴ * Bᴴ + beta * C. + for i := 0; i < m; i++ { + for j := 0; j < n; j++ { + var tmp complex64 + for l := 0; l < k; l++ { + tmp += cmplx.Conj(a[l*lda+i]) * cmplx.Conj(b[j*ldb+l]) + } + if beta == 0 { + c[i*ldc+j] = alpha * tmp + } else { + c[i*ldc+j] = alpha*tmp + beta*c[i*ldc+j] + } + } + } + } + } +} + +// Chemm performs one of the matrix-matrix operations +// +// C = alpha*A*B + beta*C if side == blas.Left +// C = alpha*B*A + beta*C if side == blas.Right +// +// where alpha and beta are scalars, A is an m×m or n×n hermitian matrix and B +// and C are m×n matrices. The imaginary parts of the diagonal elements of A are +// assumed to be zero. +// +// Complex64 implementations are autogenerated and not directly tested. +func (Implementation) Chemm(side blas.Side, uplo blas.Uplo, m, n int, alpha complex64, a []complex64, lda int, b []complex64, ldb int, beta complex64, c []complex64, ldc int) { + na := m + if side == blas.Right { + na = n + } + switch { + case side != blas.Left && side != blas.Right: + panic(badSide) + case uplo != blas.Lower && uplo != blas.Upper: + panic(badUplo) + case m < 0: + panic(mLT0) + case n < 0: + panic(nLT0) + case lda < max(1, na): + panic(badLdA) + case ldb < max(1, n): + panic(badLdB) + case ldc < max(1, n): + panic(badLdC) + } + + // Quick return if possible. + if m == 0 || n == 0 { + return + } + + // For zero matrix size the following slice length checks are trivially satisfied. + if len(a) < lda*(na-1)+na { + panic(shortA) + } + if len(b) < ldb*(m-1)+n { + panic(shortB) + } + if len(c) < ldc*(m-1)+n { + panic(shortC) + } + + // Quick return if possible. + if alpha == 0 && beta == 1 { + return + } + + if alpha == 0 { + if beta == 0 { + for i := 0; i < m; i++ { + ci := c[i*ldc : i*ldc+n] + for j := range ci { + ci[j] = 0 + } + } + } else { + for i := 0; i < m; i++ { + ci := c[i*ldc : i*ldc+n] + c64.ScalUnitary(beta, ci) + } + } + return + } + + if side == blas.Left { + // Form C = alpha*A*B + beta*C. + for i := 0; i < m; i++ { + atmp := alpha * complex(real(a[i*lda+i]), 0) + bi := b[i*ldb : i*ldb+n] + ci := c[i*ldc : i*ldc+n] + if beta == 0 { + for j, bij := range bi { + ci[j] = atmp * bij + } + } else { + for j, bij := range bi { + ci[j] = atmp*bij + beta*ci[j] + } + } + if uplo == blas.Upper { + for k := 0; k < i; k++ { + atmp = alpha * cmplx.Conj(a[k*lda+i]) + c64.AxpyUnitary(atmp, b[k*ldb:k*ldb+n], ci) + } + for k := i + 1; k < m; k++ { + atmp = alpha * a[i*lda+k] + c64.AxpyUnitary(atmp, b[k*ldb:k*ldb+n], ci) + } + } else { + for k := 0; k < i; k++ { + atmp = alpha * a[i*lda+k] + c64.AxpyUnitary(atmp, b[k*ldb:k*ldb+n], ci) + } + for k := i + 1; k < m; k++ { + atmp = alpha * cmplx.Conj(a[k*lda+i]) + c64.AxpyUnitary(atmp, b[k*ldb:k*ldb+n], ci) + } + } + } + } else { + // Form C = alpha*B*A + beta*C. + if uplo == blas.Upper { + for i := 0; i < m; i++ { + for j := n - 1; j >= 0; j-- { + abij := alpha * b[i*ldb+j] + aj := a[j*lda+j+1 : j*lda+n] + bi := b[i*ldb+j+1 : i*ldb+n] + ci := c[i*ldc+j+1 : i*ldc+n] + var tmp complex64 + for k, ajk := range aj { + ci[k] += abij * ajk + tmp += bi[k] * cmplx.Conj(ajk) + } + ajj := complex(real(a[j*lda+j]), 0) + if beta == 0 { + c[i*ldc+j] = abij*ajj + alpha*tmp + } else { + c[i*ldc+j] = abij*ajj + alpha*tmp + beta*c[i*ldc+j] + } + } + } + } else { + for i := 0; i < m; i++ { + for j := 0; j < n; j++ { + abij := alpha * b[i*ldb+j] + aj := a[j*lda : j*lda+j] + bi := b[i*ldb : i*ldb+j] + ci := c[i*ldc : i*ldc+j] + var tmp complex64 + for k, ajk := range aj { + ci[k] += abij * ajk + tmp += bi[k] * cmplx.Conj(ajk) + } + ajj := complex(real(a[j*lda+j]), 0) + if beta == 0 { + c[i*ldc+j] = abij*ajj + alpha*tmp + } else { + c[i*ldc+j] = abij*ajj + alpha*tmp + beta*c[i*ldc+j] + } + } + } + } + } +} + +// Cherk performs one of the hermitian rank-k operations +// +// C = alpha*A*Aᴴ + beta*C if trans == blas.NoTrans +// C = alpha*Aᴴ*A + beta*C if trans == blas.ConjTrans +// +// where alpha and beta are real scalars, C is an n×n hermitian matrix and A is +// an n×k matrix in the first case and a k×n matrix in the second case. +// +// The imaginary parts of the diagonal elements of C are assumed to be zero, and +// on return they will be set to zero. +// +// Complex64 implementations are autogenerated and not directly tested. +func (Implementation) Cherk(uplo blas.Uplo, trans blas.Transpose, n, k int, alpha float32, a []complex64, lda int, beta float32, c []complex64, ldc int) { + var rowA, colA int + switch trans { + default: + panic(badTranspose) + case blas.NoTrans: + rowA, colA = n, k + case blas.ConjTrans: + rowA, colA = k, n + } + switch { + case uplo != blas.Lower && uplo != blas.Upper: + panic(badUplo) + case n < 0: + panic(nLT0) + case k < 0: + panic(kLT0) + case lda < max(1, colA): + panic(badLdA) + case ldc < max(1, n): + panic(badLdC) + } + + // Quick return if possible. + if n == 0 { + return + } + + // For zero matrix size the following slice length checks are trivially satisfied. + if len(a) < (rowA-1)*lda+colA { + panic(shortA) + } + if len(c) < (n-1)*ldc+n { + panic(shortC) + } + + // Quick return if possible. + if (alpha == 0 || k == 0) && beta == 1 { + return + } + + if alpha == 0 { + if uplo == blas.Upper { + if beta == 0 { + for i := 0; i < n; i++ { + ci := c[i*ldc+i : i*ldc+n] + for j := range ci { + ci[j] = 0 + } + } + } else { + for i := 0; i < n; i++ { + ci := c[i*ldc+i : i*ldc+n] + ci[0] = complex(beta*real(ci[0]), 0) + if i != n-1 { + c64.SscalUnitary(beta, ci[1:]) + } + } + } + } else { + if beta == 0 { + for i := 0; i < n; i++ { + ci := c[i*ldc : i*ldc+i+1] + for j := range ci { + ci[j] = 0 + } + } + } else { + for i := 0; i < n; i++ { + ci := c[i*ldc : i*ldc+i+1] + if i != 0 { + c64.SscalUnitary(beta, ci[:i]) + } + ci[i] = complex(beta*real(ci[i]), 0) + } + } + } + return + } + + calpha := complex(alpha, 0) + if trans == blas.NoTrans { + // Form C = alpha*A*Aᴴ + beta*C. + cbeta := complex(beta, 0) + if uplo == blas.Upper { + for i := 0; i < n; i++ { + ci := c[i*ldc+i : i*ldc+n] + ai := a[i*lda : i*lda+k] + switch { + case beta == 0: + // Handle the i-th diagonal element of C. + ci[0] = complex(alpha*real(c64.DotcUnitary(ai, ai)), 0) + // Handle the remaining elements on the i-th row of C. + for jc := range ci[1:] { + j := i + 1 + jc + ci[jc+1] = calpha * c64.DotcUnitary(a[j*lda:j*lda+k], ai) + } + case beta != 1: + cii := calpha*c64.DotcUnitary(ai, ai) + cbeta*ci[0] + ci[0] = complex(real(cii), 0) + for jc, cij := range ci[1:] { + j := i + 1 + jc + ci[jc+1] = calpha*c64.DotcUnitary(a[j*lda:j*lda+k], ai) + cbeta*cij + } + default: + cii := calpha*c64.DotcUnitary(ai, ai) + ci[0] + ci[0] = complex(real(cii), 0) + for jc, cij := range ci[1:] { + j := i + 1 + jc + ci[jc+1] = calpha*c64.DotcUnitary(a[j*lda:j*lda+k], ai) + cij + } + } + } + } else { + for i := 0; i < n; i++ { + ci := c[i*ldc : i*ldc+i+1] + ai := a[i*lda : i*lda+k] + switch { + case beta == 0: + // Handle the first i-1 elements on the i-th row of C. + for j := range ci[:i] { + ci[j] = calpha * c64.DotcUnitary(a[j*lda:j*lda+k], ai) + } + // Handle the i-th diagonal element of C. + ci[i] = complex(alpha*real(c64.DotcUnitary(ai, ai)), 0) + case beta != 1: + for j, cij := range ci[:i] { + ci[j] = calpha*c64.DotcUnitary(a[j*lda:j*lda+k], ai) + cbeta*cij + } + cii := calpha*c64.DotcUnitary(ai, ai) + cbeta*ci[i] + ci[i] = complex(real(cii), 0) + default: + for j, cij := range ci[:i] { + ci[j] = calpha*c64.DotcUnitary(a[j*lda:j*lda+k], ai) + cij + } + cii := calpha*c64.DotcUnitary(ai, ai) + ci[i] + ci[i] = complex(real(cii), 0) + } + } + } + } else { + // Form C = alpha*Aᴴ*A + beta*C. + if uplo == blas.Upper { + for i := 0; i < n; i++ { + ci := c[i*ldc+i : i*ldc+n] + switch { + case beta == 0: + for jc := range ci { + ci[jc] = 0 + } + case beta != 1: + c64.SscalUnitary(beta, ci) + ci[0] = complex(real(ci[0]), 0) + default: + ci[0] = complex(real(ci[0]), 0) + } + for j := 0; j < k; j++ { + aji := cmplx.Conj(a[j*lda+i]) + if aji != 0 { + c64.AxpyUnitary(calpha*aji, a[j*lda+i:j*lda+n], ci) + } + } + c[i*ldc+i] = complex(real(c[i*ldc+i]), 0) + } + } else { + for i := 0; i < n; i++ { + ci := c[i*ldc : i*ldc+i+1] + switch { + case beta == 0: + for j := range ci { + ci[j] = 0 + } + case beta != 1: + c64.SscalUnitary(beta, ci) + ci[i] = complex(real(ci[i]), 0) + default: + ci[i] = complex(real(ci[i]), 0) + } + for j := 0; j < k; j++ { + aji := cmplx.Conj(a[j*lda+i]) + if aji != 0 { + c64.AxpyUnitary(calpha*aji, a[j*lda:j*lda+i+1], ci) + } + } + c[i*ldc+i] = complex(real(c[i*ldc+i]), 0) + } + } + } +} + +// Cher2k performs one of the hermitian rank-2k operations +// +// C = alpha*A*Bᴴ + conj(alpha)*B*Aᴴ + beta*C if trans == blas.NoTrans +// C = alpha*Aᴴ*B + conj(alpha)*Bᴴ*A + beta*C if trans == blas.ConjTrans +// +// where alpha and beta are scalars with beta real, C is an n×n hermitian matrix +// and A and B are n×k matrices in the first case and k×n matrices in the second case. +// +// The imaginary parts of the diagonal elements of C are assumed to be zero, and +// on return they will be set to zero. +// +// Complex64 implementations are autogenerated and not directly tested. +func (Implementation) Cher2k(uplo blas.Uplo, trans blas.Transpose, n, k int, alpha complex64, a []complex64, lda int, b []complex64, ldb int, beta float32, c []complex64, ldc int) { + var row, col int + switch trans { + default: + panic(badTranspose) + case blas.NoTrans: + row, col = n, k + case blas.ConjTrans: + row, col = k, n + } + switch { + case uplo != blas.Lower && uplo != blas.Upper: + panic(badUplo) + case n < 0: + panic(nLT0) + case k < 0: + panic(kLT0) + case lda < max(1, col): + panic(badLdA) + case ldb < max(1, col): + panic(badLdB) + case ldc < max(1, n): + panic(badLdC) + } + + // Quick return if possible. + if n == 0 { + return + } + + // For zero matrix size the following slice length checks are trivially satisfied. + if len(a) < (row-1)*lda+col { + panic(shortA) + } + if len(b) < (row-1)*ldb+col { + panic(shortB) + } + if len(c) < (n-1)*ldc+n { + panic(shortC) + } + + // Quick return if possible. + if (alpha == 0 || k == 0) && beta == 1 { + return + } + + if alpha == 0 { + if uplo == blas.Upper { + if beta == 0 { + for i := 0; i < n; i++ { + ci := c[i*ldc+i : i*ldc+n] + for j := range ci { + ci[j] = 0 + } + } + } else { + for i := 0; i < n; i++ { + ci := c[i*ldc+i : i*ldc+n] + ci[0] = complex(beta*real(ci[0]), 0) + if i != n-1 { + c64.SscalUnitary(beta, ci[1:]) + } + } + } + } else { + if beta == 0 { + for i := 0; i < n; i++ { + ci := c[i*ldc : i*ldc+i+1] + for j := range ci { + ci[j] = 0 + } + } + } else { + for i := 0; i < n; i++ { + ci := c[i*ldc : i*ldc+i+1] + if i != 0 { + c64.SscalUnitary(beta, ci[:i]) + } + ci[i] = complex(beta*real(ci[i]), 0) + } + } + } + return + } + + conjalpha := cmplx.Conj(alpha) + cbeta := complex(beta, 0) + if trans == blas.NoTrans { + // Form C = alpha*A*Bᴴ + conj(alpha)*B*Aᴴ + beta*C. + if uplo == blas.Upper { + for i := 0; i < n; i++ { + ci := c[i*ldc+i+1 : i*ldc+n] + ai := a[i*lda : i*lda+k] + bi := b[i*ldb : i*ldb+k] + if beta == 0 { + cii := alpha*c64.DotcUnitary(bi, ai) + conjalpha*c64.DotcUnitary(ai, bi) + c[i*ldc+i] = complex(real(cii), 0) + for jc := range ci { + j := i + 1 + jc + ci[jc] = alpha*c64.DotcUnitary(b[j*ldb:j*ldb+k], ai) + conjalpha*c64.DotcUnitary(a[j*lda:j*lda+k], bi) + } + } else { + cii := alpha*c64.DotcUnitary(bi, ai) + conjalpha*c64.DotcUnitary(ai, bi) + cbeta*c[i*ldc+i] + c[i*ldc+i] = complex(real(cii), 0) + for jc, cij := range ci { + j := i + 1 + jc + ci[jc] = alpha*c64.DotcUnitary(b[j*ldb:j*ldb+k], ai) + conjalpha*c64.DotcUnitary(a[j*lda:j*lda+k], bi) + cbeta*cij + } + } + } + } else { + for i := 0; i < n; i++ { + ci := c[i*ldc : i*ldc+i] + ai := a[i*lda : i*lda+k] + bi := b[i*ldb : i*ldb+k] + if beta == 0 { + for j := range ci { + ci[j] = alpha*c64.DotcUnitary(b[j*ldb:j*ldb+k], ai) + conjalpha*c64.DotcUnitary(a[j*lda:j*lda+k], bi) + } + cii := alpha*c64.DotcUnitary(bi, ai) + conjalpha*c64.DotcUnitary(ai, bi) + c[i*ldc+i] = complex(real(cii), 0) + } else { + for j, cij := range ci { + ci[j] = alpha*c64.DotcUnitary(b[j*ldb:j*ldb+k], ai) + conjalpha*c64.DotcUnitary(a[j*lda:j*lda+k], bi) + cbeta*cij + } + cii := alpha*c64.DotcUnitary(bi, ai) + conjalpha*c64.DotcUnitary(ai, bi) + cbeta*c[i*ldc+i] + c[i*ldc+i] = complex(real(cii), 0) + } + } + } + } else { + // Form C = alpha*Aᴴ*B + conj(alpha)*Bᴴ*A + beta*C. + if uplo == blas.Upper { + for i := 0; i < n; i++ { + ci := c[i*ldc+i : i*ldc+n] + switch { + case beta == 0: + for jc := range ci { + ci[jc] = 0 + } + case beta != 1: + c64.SscalUnitary(beta, ci) + ci[0] = complex(real(ci[0]), 0) + default: + ci[0] = complex(real(ci[0]), 0) + } + for j := 0; j < k; j++ { + aji := a[j*lda+i] + bji := b[j*ldb+i] + if aji != 0 { + c64.AxpyUnitary(alpha*cmplx.Conj(aji), b[j*ldb+i:j*ldb+n], ci) + } + if bji != 0 { + c64.AxpyUnitary(conjalpha*cmplx.Conj(bji), a[j*lda+i:j*lda+n], ci) + } + } + ci[0] = complex(real(ci[0]), 0) + } + } else { + for i := 0; i < n; i++ { + ci := c[i*ldc : i*ldc+i+1] + switch { + case beta == 0: + for j := range ci { + ci[j] = 0 + } + case beta != 1: + c64.SscalUnitary(beta, ci) + ci[i] = complex(real(ci[i]), 0) + default: + ci[i] = complex(real(ci[i]), 0) + } + for j := 0; j < k; j++ { + aji := a[j*lda+i] + bji := b[j*ldb+i] + if aji != 0 { + c64.AxpyUnitary(alpha*cmplx.Conj(aji), b[j*ldb:j*ldb+i+1], ci) + } + if bji != 0 { + c64.AxpyUnitary(conjalpha*cmplx.Conj(bji), a[j*lda:j*lda+i+1], ci) + } + } + ci[i] = complex(real(ci[i]), 0) + } + } + } +} + +// Csymm performs one of the matrix-matrix operations +// +// C = alpha*A*B + beta*C if side == blas.Left +// C = alpha*B*A + beta*C if side == blas.Right +// +// where alpha and beta are scalars, A is an m×m or n×n symmetric matrix and B +// and C are m×n matrices. +// +// Complex64 implementations are autogenerated and not directly tested. +func (Implementation) Csymm(side blas.Side, uplo blas.Uplo, m, n int, alpha complex64, a []complex64, lda int, b []complex64, ldb int, beta complex64, c []complex64, ldc int) { + na := m + if side == blas.Right { + na = n + } + switch { + case side != blas.Left && side != blas.Right: + panic(badSide) + case uplo != blas.Lower && uplo != blas.Upper: + panic(badUplo) + case m < 0: + panic(mLT0) + case n < 0: + panic(nLT0) + case lda < max(1, na): + panic(badLdA) + case ldb < max(1, n): + panic(badLdB) + case ldc < max(1, n): + panic(badLdC) + } + + // Quick return if possible. + if m == 0 || n == 0 { + return + } + + // For zero matrix size the following slice length checks are trivially satisfied. + if len(a) < lda*(na-1)+na { + panic(shortA) + } + if len(b) < ldb*(m-1)+n { + panic(shortB) + } + if len(c) < ldc*(m-1)+n { + panic(shortC) + } + + // Quick return if possible. + if alpha == 0 && beta == 1 { + return + } + + if alpha == 0 { + if beta == 0 { + for i := 0; i < m; i++ { + ci := c[i*ldc : i*ldc+n] + for j := range ci { + ci[j] = 0 + } + } + } else { + for i := 0; i < m; i++ { + ci := c[i*ldc : i*ldc+n] + c64.ScalUnitary(beta, ci) + } + } + return + } + + if side == blas.Left { + // Form C = alpha*A*B + beta*C. + for i := 0; i < m; i++ { + atmp := alpha * a[i*lda+i] + bi := b[i*ldb : i*ldb+n] + ci := c[i*ldc : i*ldc+n] + if beta == 0 { + for j, bij := range bi { + ci[j] = atmp * bij + } + } else { + for j, bij := range bi { + ci[j] = atmp*bij + beta*ci[j] + } + } + if uplo == blas.Upper { + for k := 0; k < i; k++ { + atmp = alpha * a[k*lda+i] + c64.AxpyUnitary(atmp, b[k*ldb:k*ldb+n], ci) + } + for k := i + 1; k < m; k++ { + atmp = alpha * a[i*lda+k] + c64.AxpyUnitary(atmp, b[k*ldb:k*ldb+n], ci) + } + } else { + for k := 0; k < i; k++ { + atmp = alpha * a[i*lda+k] + c64.AxpyUnitary(atmp, b[k*ldb:k*ldb+n], ci) + } + for k := i + 1; k < m; k++ { + atmp = alpha * a[k*lda+i] + c64.AxpyUnitary(atmp, b[k*ldb:k*ldb+n], ci) + } + } + } + } else { + // Form C = alpha*B*A + beta*C. + if uplo == blas.Upper { + for i := 0; i < m; i++ { + for j := n - 1; j >= 0; j-- { + abij := alpha * b[i*ldb+j] + aj := a[j*lda+j+1 : j*lda+n] + bi := b[i*ldb+j+1 : i*ldb+n] + ci := c[i*ldc+j+1 : i*ldc+n] + var tmp complex64 + for k, ajk := range aj { + ci[k] += abij * ajk + tmp += bi[k] * ajk + } + if beta == 0 { + c[i*ldc+j] = abij*a[j*lda+j] + alpha*tmp + } else { + c[i*ldc+j] = abij*a[j*lda+j] + alpha*tmp + beta*c[i*ldc+j] + } + } + } + } else { + for i := 0; i < m; i++ { + for j := 0; j < n; j++ { + abij := alpha * b[i*ldb+j] + aj := a[j*lda : j*lda+j] + bi := b[i*ldb : i*ldb+j] + ci := c[i*ldc : i*ldc+j] + var tmp complex64 + for k, ajk := range aj { + ci[k] += abij * ajk + tmp += bi[k] * ajk + } + if beta == 0 { + c[i*ldc+j] = abij*a[j*lda+j] + alpha*tmp + } else { + c[i*ldc+j] = abij*a[j*lda+j] + alpha*tmp + beta*c[i*ldc+j] + } + } + } + } + } +} + +// Csyrk performs one of the symmetric rank-k operations +// +// C = alpha*A*Aᵀ + beta*C if trans == blas.NoTrans +// C = alpha*Aᵀ*A + beta*C if trans == blas.Trans +// +// where alpha and beta are scalars, C is an n×n symmetric matrix and A is +// an n×k matrix in the first case and a k×n matrix in the second case. +// +// Complex64 implementations are autogenerated and not directly tested. +func (Implementation) Csyrk(uplo blas.Uplo, trans blas.Transpose, n, k int, alpha complex64, a []complex64, lda int, beta complex64, c []complex64, ldc int) { + var rowA, colA int + switch trans { + default: + panic(badTranspose) + case blas.NoTrans: + rowA, colA = n, k + case blas.Trans: + rowA, colA = k, n + } + switch { + case uplo != blas.Lower && uplo != blas.Upper: + panic(badUplo) + case n < 0: + panic(nLT0) + case k < 0: + panic(kLT0) + case lda < max(1, colA): + panic(badLdA) + case ldc < max(1, n): + panic(badLdC) + } + + // Quick return if possible. + if n == 0 { + return + } + + // For zero matrix size the following slice length checks are trivially satisfied. + if len(a) < (rowA-1)*lda+colA { + panic(shortA) + } + if len(c) < (n-1)*ldc+n { + panic(shortC) + } + + // Quick return if possible. + if (alpha == 0 || k == 0) && beta == 1 { + return + } + + if alpha == 0 { + if uplo == blas.Upper { + if beta == 0 { + for i := 0; i < n; i++ { + ci := c[i*ldc+i : i*ldc+n] + for j := range ci { + ci[j] = 0 + } + } + } else { + for i := 0; i < n; i++ { + ci := c[i*ldc+i : i*ldc+n] + c64.ScalUnitary(beta, ci) + } + } + } else { + if beta == 0 { + for i := 0; i < n; i++ { + ci := c[i*ldc : i*ldc+i+1] + for j := range ci { + ci[j] = 0 + } + } + } else { + for i := 0; i < n; i++ { + ci := c[i*ldc : i*ldc+i+1] + c64.ScalUnitary(beta, ci) + } + } + } + return + } + + if trans == blas.NoTrans { + // Form C = alpha*A*Aᵀ + beta*C. + if uplo == blas.Upper { + for i := 0; i < n; i++ { + ci := c[i*ldc+i : i*ldc+n] + ai := a[i*lda : i*lda+k] + if beta == 0 { + for jc := range ci { + j := i + jc + ci[jc] = alpha * c64.DotuUnitary(ai, a[j*lda:j*lda+k]) + } + } else { + for jc, cij := range ci { + j := i + jc + ci[jc] = beta*cij + alpha*c64.DotuUnitary(ai, a[j*lda:j*lda+k]) + } + } + } + } else { + for i := 0; i < n; i++ { + ci := c[i*ldc : i*ldc+i+1] + ai := a[i*lda : i*lda+k] + if beta == 0 { + for j := range ci { + ci[j] = alpha * c64.DotuUnitary(ai, a[j*lda:j*lda+k]) + } + } else { + for j, cij := range ci { + ci[j] = beta*cij + alpha*c64.DotuUnitary(ai, a[j*lda:j*lda+k]) + } + } + } + } + } else { + // Form C = alpha*Aᵀ*A + beta*C. + if uplo == blas.Upper { + for i := 0; i < n; i++ { + ci := c[i*ldc+i : i*ldc+n] + switch { + case beta == 0: + for jc := range ci { + ci[jc] = 0 + } + case beta != 1: + for jc := range ci { + ci[jc] *= beta + } + } + for j := 0; j < k; j++ { + aji := a[j*lda+i] + if aji != 0 { + c64.AxpyUnitary(alpha*aji, a[j*lda+i:j*lda+n], ci) + } + } + } + } else { + for i := 0; i < n; i++ { + ci := c[i*ldc : i*ldc+i+1] + switch { + case beta == 0: + for j := range ci { + ci[j] = 0 + } + case beta != 1: + for j := range ci { + ci[j] *= beta + } + } + for j := 0; j < k; j++ { + aji := a[j*lda+i] + if aji != 0 { + c64.AxpyUnitary(alpha*aji, a[j*lda:j*lda+i+1], ci) + } + } + } + } + } +} + +// Csyr2k performs one of the symmetric rank-2k operations +// +// C = alpha*A*Bᵀ + alpha*B*Aᵀ + beta*C if trans == blas.NoTrans +// C = alpha*Aᵀ*B + alpha*Bᵀ*A + beta*C if trans == blas.Trans +// +// where alpha and beta are scalars, C is an n×n symmetric matrix and A and B +// are n×k matrices in the first case and k×n matrices in the second case. +// +// Complex64 implementations are autogenerated and not directly tested. +func (Implementation) Csyr2k(uplo blas.Uplo, trans blas.Transpose, n, k int, alpha complex64, a []complex64, lda int, b []complex64, ldb int, beta complex64, c []complex64, ldc int) { + var row, col int + switch trans { + default: + panic(badTranspose) + case blas.NoTrans: + row, col = n, k + case blas.Trans: + row, col = k, n + } + switch { + case uplo != blas.Lower && uplo != blas.Upper: + panic(badUplo) + case n < 0: + panic(nLT0) + case k < 0: + panic(kLT0) + case lda < max(1, col): + panic(badLdA) + case ldb < max(1, col): + panic(badLdB) + case ldc < max(1, n): + panic(badLdC) + } + + // Quick return if possible. + if n == 0 { + return + } + + // For zero matrix size the following slice length checks are trivially satisfied. + if len(a) < (row-1)*lda+col { + panic(shortA) + } + if len(b) < (row-1)*ldb+col { + panic(shortB) + } + if len(c) < (n-1)*ldc+n { + panic(shortC) + } + + // Quick return if possible. + if (alpha == 0 || k == 0) && beta == 1 { + return + } + + if alpha == 0 { + if uplo == blas.Upper { + if beta == 0 { + for i := 0; i < n; i++ { + ci := c[i*ldc+i : i*ldc+n] + for j := range ci { + ci[j] = 0 + } + } + } else { + for i := 0; i < n; i++ { + ci := c[i*ldc+i : i*ldc+n] + c64.ScalUnitary(beta, ci) + } + } + } else { + if beta == 0 { + for i := 0; i < n; i++ { + ci := c[i*ldc : i*ldc+i+1] + for j := range ci { + ci[j] = 0 + } + } + } else { + for i := 0; i < n; i++ { + ci := c[i*ldc : i*ldc+i+1] + c64.ScalUnitary(beta, ci) + } + } + } + return + } + + if trans == blas.NoTrans { + // Form C = alpha*A*Bᵀ + alpha*B*Aᵀ + beta*C. + if uplo == blas.Upper { + for i := 0; i < n; i++ { + ci := c[i*ldc+i : i*ldc+n] + ai := a[i*lda : i*lda+k] + bi := b[i*ldb : i*ldb+k] + if beta == 0 { + for jc := range ci { + j := i + jc + ci[jc] = alpha*c64.DotuUnitary(ai, b[j*ldb:j*ldb+k]) + alpha*c64.DotuUnitary(bi, a[j*lda:j*lda+k]) + } + } else { + for jc, cij := range ci { + j := i + jc + ci[jc] = alpha*c64.DotuUnitary(ai, b[j*ldb:j*ldb+k]) + alpha*c64.DotuUnitary(bi, a[j*lda:j*lda+k]) + beta*cij + } + } + } + } else { + for i := 0; i < n; i++ { + ci := c[i*ldc : i*ldc+i+1] + ai := a[i*lda : i*lda+k] + bi := b[i*ldb : i*ldb+k] + if beta == 0 { + for j := range ci { + ci[j] = alpha*c64.DotuUnitary(ai, b[j*ldb:j*ldb+k]) + alpha*c64.DotuUnitary(bi, a[j*lda:j*lda+k]) + } + } else { + for j, cij := range ci { + ci[j] = alpha*c64.DotuUnitary(ai, b[j*ldb:j*ldb+k]) + alpha*c64.DotuUnitary(bi, a[j*lda:j*lda+k]) + beta*cij + } + } + } + } + } else { + // Form C = alpha*Aᵀ*B + alpha*Bᵀ*A + beta*C. + if uplo == blas.Upper { + for i := 0; i < n; i++ { + ci := c[i*ldc+i : i*ldc+n] + switch { + case beta == 0: + for jc := range ci { + ci[jc] = 0 + } + case beta != 1: + for jc := range ci { + ci[jc] *= beta + } + } + for j := 0; j < k; j++ { + aji := a[j*lda+i] + bji := b[j*ldb+i] + if aji != 0 { + c64.AxpyUnitary(alpha*aji, b[j*ldb+i:j*ldb+n], ci) + } + if bji != 0 { + c64.AxpyUnitary(alpha*bji, a[j*lda+i:j*lda+n], ci) + } + } + } + } else { + for i := 0; i < n; i++ { + ci := c[i*ldc : i*ldc+i+1] + switch { + case beta == 0: + for j := range ci { + ci[j] = 0 + } + case beta != 1: + for j := range ci { + ci[j] *= beta + } + } + for j := 0; j < k; j++ { + aji := a[j*lda+i] + bji := b[j*ldb+i] + if aji != 0 { + c64.AxpyUnitary(alpha*aji, b[j*ldb:j*ldb+i+1], ci) + } + if bji != 0 { + c64.AxpyUnitary(alpha*bji, a[j*lda:j*lda+i+1], ci) + } + } + } + } + } +} + +// Ctrmm performs one of the matrix-matrix operations +// +// B = alpha * op(A) * B if side == blas.Left, +// B = alpha * B * op(A) if side == blas.Right, +// +// where alpha is a scalar, B is an m×n matrix, A is a unit, or non-unit, +// upper or lower triangular matrix and op(A) is one of +// +// op(A) = A if trans == blas.NoTrans, +// op(A) = Aᵀ if trans == blas.Trans, +// op(A) = Aᴴ if trans == blas.ConjTrans. +// +// Complex64 implementations are autogenerated and not directly tested. +func (Implementation) Ctrmm(side blas.Side, uplo blas.Uplo, trans blas.Transpose, diag blas.Diag, m, n int, alpha complex64, a []complex64, lda int, b []complex64, ldb int) { + na := m + if side == blas.Right { + na = n + } + switch { + case side != blas.Left && side != blas.Right: + panic(badSide) + case uplo != blas.Lower && uplo != blas.Upper: + panic(badUplo) + case trans != blas.NoTrans && trans != blas.Trans && trans != blas.ConjTrans: + panic(badTranspose) + case diag != blas.Unit && diag != blas.NonUnit: + panic(badDiag) + case m < 0: + panic(mLT0) + case n < 0: + panic(nLT0) + case lda < max(1, na): + panic(badLdA) + case ldb < max(1, n): + panic(badLdB) + } + + // Quick return if possible. + if m == 0 || n == 0 { + return + } + + // For zero matrix size the following slice length checks are trivially satisfied. + if len(a) < (na-1)*lda+na { + panic(shortA) + } + if len(b) < (m-1)*ldb+n { + panic(shortB) + } + + // Quick return if possible. + if alpha == 0 { + for i := 0; i < m; i++ { + bi := b[i*ldb : i*ldb+n] + for j := range bi { + bi[j] = 0 + } + } + return + } + + noConj := trans != blas.ConjTrans + noUnit := diag == blas.NonUnit + if side == blas.Left { + if trans == blas.NoTrans { + // Form B = alpha*A*B. + if uplo == blas.Upper { + for i := 0; i < m; i++ { + aii := alpha + if noUnit { + aii *= a[i*lda+i] + } + bi := b[i*ldb : i*ldb+n] + for j := range bi { + bi[j] *= aii + } + for ja, aij := range a[i*lda+i+1 : i*lda+m] { + j := ja + i + 1 + if aij != 0 { + c64.AxpyUnitary(alpha*aij, b[j*ldb:j*ldb+n], bi) + } + } + } + } else { + for i := m - 1; i >= 0; i-- { + aii := alpha + if noUnit { + aii *= a[i*lda+i] + } + bi := b[i*ldb : i*ldb+n] + for j := range bi { + bi[j] *= aii + } + for j, aij := range a[i*lda : i*lda+i] { + if aij != 0 { + c64.AxpyUnitary(alpha*aij, b[j*ldb:j*ldb+n], bi) + } + } + } + } + } else { + // Form B = alpha*Aᵀ*B or B = alpha*Aᴴ*B. + if uplo == blas.Upper { + for k := m - 1; k >= 0; k-- { + bk := b[k*ldb : k*ldb+n] + for ja, ajk := range a[k*lda+k+1 : k*lda+m] { + if ajk == 0 { + continue + } + j := k + 1 + ja + if noConj { + c64.AxpyUnitary(alpha*ajk, bk, b[j*ldb:j*ldb+n]) + } else { + c64.AxpyUnitary(alpha*cmplx.Conj(ajk), bk, b[j*ldb:j*ldb+n]) + } + } + akk := alpha + if noUnit { + if noConj { + akk *= a[k*lda+k] + } else { + akk *= cmplx.Conj(a[k*lda+k]) + } + } + if akk != 1 { + c64.ScalUnitary(akk, bk) + } + } + } else { + for k := 0; k < m; k++ { + bk := b[k*ldb : k*ldb+n] + for j, ajk := range a[k*lda : k*lda+k] { + if ajk == 0 { + continue + } + if noConj { + c64.AxpyUnitary(alpha*ajk, bk, b[j*ldb:j*ldb+n]) + } else { + c64.AxpyUnitary(alpha*cmplx.Conj(ajk), bk, b[j*ldb:j*ldb+n]) + } + } + akk := alpha + if noUnit { + if noConj { + akk *= a[k*lda+k] + } else { + akk *= cmplx.Conj(a[k*lda+k]) + } + } + if akk != 1 { + c64.ScalUnitary(akk, bk) + } + } + } + } + } else { + if trans == blas.NoTrans { + // Form B = alpha*B*A. + if uplo == blas.Upper { + for i := 0; i < m; i++ { + bi := b[i*ldb : i*ldb+n] + for k := n - 1; k >= 0; k-- { + abik := alpha * bi[k] + if abik == 0 { + continue + } + bi[k] = abik + if noUnit { + bi[k] *= a[k*lda+k] + } + c64.AxpyUnitary(abik, a[k*lda+k+1:k*lda+n], bi[k+1:]) + } + } + } else { + for i := 0; i < m; i++ { + bi := b[i*ldb : i*ldb+n] + for k := 0; k < n; k++ { + abik := alpha * bi[k] + if abik == 0 { + continue + } + bi[k] = abik + if noUnit { + bi[k] *= a[k*lda+k] + } + c64.AxpyUnitary(abik, a[k*lda:k*lda+k], bi[:k]) + } + } + } + } else { + // Form B = alpha*B*Aᵀ or B = alpha*B*Aᴴ. + if uplo == blas.Upper { + for i := 0; i < m; i++ { + bi := b[i*ldb : i*ldb+n] + for j, bij := range bi { + if noConj { + if noUnit { + bij *= a[j*lda+j] + } + bij += c64.DotuUnitary(a[j*lda+j+1:j*lda+n], bi[j+1:n]) + } else { + if noUnit { + bij *= cmplx.Conj(a[j*lda+j]) + } + bij += c64.DotcUnitary(a[j*lda+j+1:j*lda+n], bi[j+1:n]) + } + bi[j] = alpha * bij + } + } + } else { + for i := 0; i < m; i++ { + bi := b[i*ldb : i*ldb+n] + for j := n - 1; j >= 0; j-- { + bij := bi[j] + if noConj { + if noUnit { + bij *= a[j*lda+j] + } + bij += c64.DotuUnitary(a[j*lda:j*lda+j], bi[:j]) + } else { + if noUnit { + bij *= cmplx.Conj(a[j*lda+j]) + } + bij += c64.DotcUnitary(a[j*lda:j*lda+j], bi[:j]) + } + bi[j] = alpha * bij + } + } + } + } + } +} + +// Ctrsm solves one of the matrix equations +// +// op(A) * X = alpha * B if side == blas.Left, +// X * op(A) = alpha * B if side == blas.Right, +// +// where alpha is a scalar, X and B are m×n matrices, A is a unit or +// non-unit, upper or lower triangular matrix and op(A) is one of +// +// op(A) = A if transA == blas.NoTrans, +// op(A) = Aᵀ if transA == blas.Trans, +// op(A) = Aᴴ if transA == blas.ConjTrans. +// +// On return the matrix X is overwritten on B. +// +// Complex64 implementations are autogenerated and not directly tested. +func (Implementation) Ctrsm(side blas.Side, uplo blas.Uplo, transA blas.Transpose, diag blas.Diag, m, n int, alpha complex64, a []complex64, lda int, b []complex64, ldb int) { + na := m + if side == blas.Right { + na = n + } + switch { + case side != blas.Left && side != blas.Right: + panic(badSide) + case uplo != blas.Lower && uplo != blas.Upper: + panic(badUplo) + case transA != blas.NoTrans && transA != blas.Trans && transA != blas.ConjTrans: + panic(badTranspose) + case diag != blas.Unit && diag != blas.NonUnit: + panic(badDiag) + case m < 0: + panic(mLT0) + case n < 0: + panic(nLT0) + case lda < max(1, na): + panic(badLdA) + case ldb < max(1, n): + panic(badLdB) + } + + // Quick return if possible. + if m == 0 || n == 0 { + return + } + + // For zero matrix size the following slice length checks are trivially satisfied. + if len(a) < (na-1)*lda+na { + panic(shortA) + } + if len(b) < (m-1)*ldb+n { + panic(shortB) + } + + if alpha == 0 { + for i := 0; i < m; i++ { + for j := 0; j < n; j++ { + b[i*ldb+j] = 0 + } + } + return + } + + noConj := transA != blas.ConjTrans + noUnit := diag == blas.NonUnit + if side == blas.Left { + if transA == blas.NoTrans { + // Form B = alpha*inv(A)*B. + if uplo == blas.Upper { + for i := m - 1; i >= 0; i-- { + bi := b[i*ldb : i*ldb+n] + if alpha != 1 { + c64.ScalUnitary(alpha, bi) + } + for ka, aik := range a[i*lda+i+1 : i*lda+m] { + k := i + 1 + ka + if aik != 0 { + c64.AxpyUnitary(-aik, b[k*ldb:k*ldb+n], bi) + } + } + if noUnit { + c64.ScalUnitary(1/a[i*lda+i], bi) + } + } + } else { + for i := 0; i < m; i++ { + bi := b[i*ldb : i*ldb+n] + if alpha != 1 { + c64.ScalUnitary(alpha, bi) + } + for j, aij := range a[i*lda : i*lda+i] { + if aij != 0 { + c64.AxpyUnitary(-aij, b[j*ldb:j*ldb+n], bi) + } + } + if noUnit { + c64.ScalUnitary(1/a[i*lda+i], bi) + } + } + } + } else { + // Form B = alpha*inv(Aᵀ)*B or B = alpha*inv(Aᴴ)*B. + if uplo == blas.Upper { + for i := 0; i < m; i++ { + bi := b[i*ldb : i*ldb+n] + if noUnit { + if noConj { + c64.ScalUnitary(1/a[i*lda+i], bi) + } else { + c64.ScalUnitary(1/cmplx.Conj(a[i*lda+i]), bi) + } + } + for ja, aij := range a[i*lda+i+1 : i*lda+m] { + if aij == 0 { + continue + } + j := i + 1 + ja + if noConj { + c64.AxpyUnitary(-aij, bi, b[j*ldb:j*ldb+n]) + } else { + c64.AxpyUnitary(-cmplx.Conj(aij), bi, b[j*ldb:j*ldb+n]) + } + } + if alpha != 1 { + c64.ScalUnitary(alpha, bi) + } + } + } else { + for i := m - 1; i >= 0; i-- { + bi := b[i*ldb : i*ldb+n] + if noUnit { + if noConj { + c64.ScalUnitary(1/a[i*lda+i], bi) + } else { + c64.ScalUnitary(1/cmplx.Conj(a[i*lda+i]), bi) + } + } + for j, aij := range a[i*lda : i*lda+i] { + if aij == 0 { + continue + } + if noConj { + c64.AxpyUnitary(-aij, bi, b[j*ldb:j*ldb+n]) + } else { + c64.AxpyUnitary(-cmplx.Conj(aij), bi, b[j*ldb:j*ldb+n]) + } + } + if alpha != 1 { + c64.ScalUnitary(alpha, bi) + } + } + } + } + } else { + if transA == blas.NoTrans { + // Form B = alpha*B*inv(A). + if uplo == blas.Upper { + for i := 0; i < m; i++ { + bi := b[i*ldb : i*ldb+n] + if alpha != 1 { + c64.ScalUnitary(alpha, bi) + } + for j, bij := range bi { + if bij == 0 { + continue + } + if noUnit { + bi[j] /= a[j*lda+j] + } + c64.AxpyUnitary(-bi[j], a[j*lda+j+1:j*lda+n], bi[j+1:n]) + } + } + } else { + for i := 0; i < m; i++ { + bi := b[i*ldb : i*ldb+n] + if alpha != 1 { + c64.ScalUnitary(alpha, bi) + } + for j := n - 1; j >= 0; j-- { + if bi[j] == 0 { + continue + } + if noUnit { + bi[j] /= a[j*lda+j] + } + c64.AxpyUnitary(-bi[j], a[j*lda:j*lda+j], bi[:j]) + } + } + } + } else { + // Form B = alpha*B*inv(Aᵀ) or B = alpha*B*inv(Aᴴ). + if uplo == blas.Upper { + for i := 0; i < m; i++ { + bi := b[i*ldb : i*ldb+n] + for j := n - 1; j >= 0; j-- { + bij := alpha * bi[j] + if noConj { + bij -= c64.DotuUnitary(a[j*lda+j+1:j*lda+n], bi[j+1:n]) + if noUnit { + bij /= a[j*lda+j] + } + } else { + bij -= c64.DotcUnitary(a[j*lda+j+1:j*lda+n], bi[j+1:n]) + if noUnit { + bij /= cmplx.Conj(a[j*lda+j]) + } + } + bi[j] = bij + } + } + } else { + for i := 0; i < m; i++ { + bi := b[i*ldb : i*ldb+n] + for j, bij := range bi { + bij *= alpha + if noConj { + bij -= c64.DotuUnitary(a[j*lda:j*lda+j], bi[:j]) + if noUnit { + bij /= a[j*lda+j] + } + } else { + bij -= c64.DotcUnitary(a[j*lda:j*lda+j], bi[:j]) + if noUnit { + bij /= cmplx.Conj(a[j*lda+j]) + } + } + bi[j] = bij + } + } + } + } + } +} diff --git a/vendor/gonum.org/v1/gonum/blas/gonum/level3float32.go b/vendor/gonum.org/v1/gonum/blas/gonum/level3float32.go new file mode 100644 index 0000000000..4b813fbc05 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/blas/gonum/level3float32.go @@ -0,0 +1,925 @@ +// Code generated by "go generate gonum.org/v1/gonum/blas/gonum”; DO NOT EDIT. + +// Copyright ©2014 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gonum + +import ( + "gonum.org/v1/gonum/blas" + "gonum.org/v1/gonum/internal/asm/f32" +) + +var _ blas.Float32Level3 = Implementation{} + +// Strsm solves one of the matrix equations +// +// A * X = alpha * B if tA == blas.NoTrans and side == blas.Left +// Aᵀ * X = alpha * B if tA == blas.Trans or blas.ConjTrans, and side == blas.Left +// X * A = alpha * B if tA == blas.NoTrans and side == blas.Right +// X * Aᵀ = alpha * B if tA == blas.Trans or blas.ConjTrans, and side == blas.Right +// +// where A is an n×n or m×m triangular matrix, X and B are m×n matrices, and alpha is a +// scalar. +// +// At entry to the function, X contains the values of B, and the result is +// stored in-place into X. +// +// No check is made that A is invertible. +// +// Float32 implementations are autogenerated and not directly tested. +func (Implementation) Strsm(s blas.Side, ul blas.Uplo, tA blas.Transpose, d blas.Diag, m, n int, alpha float32, a []float32, lda int, b []float32, ldb int) { + if s != blas.Left && s != blas.Right { + panic(badSide) + } + if ul != blas.Lower && ul != blas.Upper { + panic(badUplo) + } + if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans { + panic(badTranspose) + } + if d != blas.NonUnit && d != blas.Unit { + panic(badDiag) + } + if m < 0 { + panic(mLT0) + } + if n < 0 { + panic(nLT0) + } + k := n + if s == blas.Left { + k = m + } + if lda < max(1, k) { + panic(badLdA) + } + if ldb < max(1, n) { + panic(badLdB) + } + + // Quick return if possible. + if m == 0 || n == 0 { + return + } + + // For zero matrix size the following slice length checks are trivially satisfied. + if len(a) < lda*(k-1)+k { + panic(shortA) + } + if len(b) < ldb*(m-1)+n { + panic(shortB) + } + + if alpha == 0 { + for i := 0; i < m; i++ { + btmp := b[i*ldb : i*ldb+n] + for j := range btmp { + btmp[j] = 0 + } + } + return + } + nonUnit := d == blas.NonUnit + if s == blas.Left { + if tA == blas.NoTrans { + if ul == blas.Upper { + for i := m - 1; i >= 0; i-- { + btmp := b[i*ldb : i*ldb+n] + if alpha != 1 { + f32.ScalUnitary(alpha, btmp) + } + for ka, va := range a[i*lda+i+1 : i*lda+m] { + if va != 0 { + k := ka + i + 1 + f32.AxpyUnitary(-va, b[k*ldb:k*ldb+n], btmp) + } + } + if nonUnit { + tmp := 1 / a[i*lda+i] + f32.ScalUnitary(tmp, btmp) + } + } + return + } + for i := 0; i < m; i++ { + btmp := b[i*ldb : i*ldb+n] + if alpha != 1 { + f32.ScalUnitary(alpha, btmp) + } + for k, va := range a[i*lda : i*lda+i] { + if va != 0 { + f32.AxpyUnitary(-va, b[k*ldb:k*ldb+n], btmp) + } + } + if nonUnit { + tmp := 1 / a[i*lda+i] + f32.ScalUnitary(tmp, btmp) + } + } + return + } + // Cases where a is transposed + if ul == blas.Upper { + for k := 0; k < m; k++ { + btmpk := b[k*ldb : k*ldb+n] + if nonUnit { + tmp := 1 / a[k*lda+k] + f32.ScalUnitary(tmp, btmpk) + } + for ia, va := range a[k*lda+k+1 : k*lda+m] { + if va != 0 { + i := ia + k + 1 + f32.AxpyUnitary(-va, btmpk, b[i*ldb:i*ldb+n]) + } + } + if alpha != 1 { + f32.ScalUnitary(alpha, btmpk) + } + } + return + } + for k := m - 1; k >= 0; k-- { + btmpk := b[k*ldb : k*ldb+n] + if nonUnit { + tmp := 1 / a[k*lda+k] + f32.ScalUnitary(tmp, btmpk) + } + for i, va := range a[k*lda : k*lda+k] { + if va != 0 { + f32.AxpyUnitary(-va, btmpk, b[i*ldb:i*ldb+n]) + } + } + if alpha != 1 { + f32.ScalUnitary(alpha, btmpk) + } + } + return + } + // Cases where a is to the right of X. + if tA == blas.NoTrans { + if ul == blas.Upper { + for i := 0; i < m; i++ { + btmp := b[i*ldb : i*ldb+n] + if alpha != 1 { + f32.ScalUnitary(alpha, btmp) + } + for k, vb := range btmp { + if vb == 0 { + continue + } + if nonUnit { + btmp[k] /= a[k*lda+k] + } + f32.AxpyUnitary(-btmp[k], a[k*lda+k+1:k*lda+n], btmp[k+1:n]) + } + } + return + } + for i := 0; i < m; i++ { + btmp := b[i*ldb : i*ldb+n] + if alpha != 1 { + f32.ScalUnitary(alpha, btmp) + } + for k := n - 1; k >= 0; k-- { + if btmp[k] == 0 { + continue + } + if nonUnit { + btmp[k] /= a[k*lda+k] + } + f32.AxpyUnitary(-btmp[k], a[k*lda:k*lda+k], btmp[:k]) + } + } + return + } + // Cases where a is transposed. + if ul == blas.Upper { + for i := 0; i < m; i++ { + btmp := b[i*ldb : i*ldb+n] + for j := n - 1; j >= 0; j-- { + tmp := alpha*btmp[j] - f32.DotUnitary(a[j*lda+j+1:j*lda+n], btmp[j+1:]) + if nonUnit { + tmp /= a[j*lda+j] + } + btmp[j] = tmp + } + } + return + } + for i := 0; i < m; i++ { + btmp := b[i*ldb : i*ldb+n] + for j := 0; j < n; j++ { + tmp := alpha*btmp[j] - f32.DotUnitary(a[j*lda:j*lda+j], btmp[:j]) + if nonUnit { + tmp /= a[j*lda+j] + } + btmp[j] = tmp + } + } +} + +// Ssymm performs one of the matrix-matrix operations +// +// C = alpha * A * B + beta * C if side == blas.Left +// C = alpha * B * A + beta * C if side == blas.Right +// +// where A is an n×n or m×m symmetric matrix, B and C are m×n matrices, and alpha +// is a scalar. +// +// Float32 implementations are autogenerated and not directly tested. +func (Implementation) Ssymm(s blas.Side, ul blas.Uplo, m, n int, alpha float32, a []float32, lda int, b []float32, ldb int, beta float32, c []float32, ldc int) { + if s != blas.Right && s != blas.Left { + panic(badSide) + } + if ul != blas.Lower && ul != blas.Upper { + panic(badUplo) + } + if m < 0 { + panic(mLT0) + } + if n < 0 { + panic(nLT0) + } + k := n + if s == blas.Left { + k = m + } + if lda < max(1, k) { + panic(badLdA) + } + if ldb < max(1, n) { + panic(badLdB) + } + if ldc < max(1, n) { + panic(badLdC) + } + + // Quick return if possible. + if m == 0 || n == 0 { + return + } + + // For zero matrix size the following slice length checks are trivially satisfied. + if len(a) < lda*(k-1)+k { + panic(shortA) + } + if len(b) < ldb*(m-1)+n { + panic(shortB) + } + if len(c) < ldc*(m-1)+n { + panic(shortC) + } + + // Quick return if possible. + if alpha == 0 && beta == 1 { + return + } + + if beta == 0 { + for i := 0; i < m; i++ { + ctmp := c[i*ldc : i*ldc+n] + for j := range ctmp { + ctmp[j] = 0 + } + } + } + + if alpha == 0 { + if beta != 0 { + for i := 0; i < m; i++ { + ctmp := c[i*ldc : i*ldc+n] + for j := 0; j < n; j++ { + ctmp[j] *= beta + } + } + } + return + } + + isUpper := ul == blas.Upper + if s == blas.Left { + for i := 0; i < m; i++ { + atmp := alpha * a[i*lda+i] + btmp := b[i*ldb : i*ldb+n] + ctmp := c[i*ldc : i*ldc+n] + for j, v := range btmp { + ctmp[j] *= beta + ctmp[j] += atmp * v + } + + for k := 0; k < i; k++ { + var atmp float32 + if isUpper { + atmp = a[k*lda+i] + } else { + atmp = a[i*lda+k] + } + atmp *= alpha + f32.AxpyUnitary(atmp, b[k*ldb:k*ldb+n], ctmp) + } + for k := i + 1; k < m; k++ { + var atmp float32 + if isUpper { + atmp = a[i*lda+k] + } else { + atmp = a[k*lda+i] + } + atmp *= alpha + f32.AxpyUnitary(atmp, b[k*ldb:k*ldb+n], ctmp) + } + } + return + } + if isUpper { + for i := 0; i < m; i++ { + for j := n - 1; j >= 0; j-- { + tmp := alpha * b[i*ldb+j] + var tmp2 float32 + atmp := a[j*lda+j+1 : j*lda+n] + btmp := b[i*ldb+j+1 : i*ldb+n] + ctmp := c[i*ldc+j+1 : i*ldc+n] + for k, v := range atmp { + ctmp[k] += tmp * v + tmp2 += btmp[k] * v + } + c[i*ldc+j] *= beta + c[i*ldc+j] += tmp*a[j*lda+j] + alpha*tmp2 + } + } + return + } + for i := 0; i < m; i++ { + for j := 0; j < n; j++ { + tmp := alpha * b[i*ldb+j] + var tmp2 float32 + atmp := a[j*lda : j*lda+j] + btmp := b[i*ldb : i*ldb+j] + ctmp := c[i*ldc : i*ldc+j] + for k, v := range atmp { + ctmp[k] += tmp * v + tmp2 += btmp[k] * v + } + c[i*ldc+j] *= beta + c[i*ldc+j] += tmp*a[j*lda+j] + alpha*tmp2 + } + } +} + +// Ssyrk performs one of the symmetric rank-k operations +// +// C = alpha * A * Aᵀ + beta * C if tA == blas.NoTrans +// C = alpha * Aᵀ * A + beta * C if tA == blas.Trans or tA == blas.ConjTrans +// +// where A is an n×k or k×n matrix, C is an n×n symmetric matrix, and alpha and +// beta are scalars. +// +// Float32 implementations are autogenerated and not directly tested. +func (Implementation) Ssyrk(ul blas.Uplo, tA blas.Transpose, n, k int, alpha float32, a []float32, lda int, beta float32, c []float32, ldc int) { + if ul != blas.Lower && ul != blas.Upper { + panic(badUplo) + } + if tA != blas.Trans && tA != blas.NoTrans && tA != blas.ConjTrans { + panic(badTranspose) + } + if n < 0 { + panic(nLT0) + } + if k < 0 { + panic(kLT0) + } + row, col := k, n + if tA == blas.NoTrans { + row, col = n, k + } + if lda < max(1, col) { + panic(badLdA) + } + if ldc < max(1, n) { + panic(badLdC) + } + + // Quick return if possible. + if n == 0 { + return + } + + // For zero matrix size the following slice length checks are trivially satisfied. + if len(a) < lda*(row-1)+col { + panic(shortA) + } + if len(c) < ldc*(n-1)+n { + panic(shortC) + } + + if alpha == 0 { + if beta == 0 { + if ul == blas.Upper { + for i := 0; i < n; i++ { + ctmp := c[i*ldc+i : i*ldc+n] + for j := range ctmp { + ctmp[j] = 0 + } + } + return + } + for i := 0; i < n; i++ { + ctmp := c[i*ldc : i*ldc+i+1] + for j := range ctmp { + ctmp[j] = 0 + } + } + return + } + if ul == blas.Upper { + for i := 0; i < n; i++ { + ctmp := c[i*ldc+i : i*ldc+n] + for j := range ctmp { + ctmp[j] *= beta + } + } + return + } + for i := 0; i < n; i++ { + ctmp := c[i*ldc : i*ldc+i+1] + for j := range ctmp { + ctmp[j] *= beta + } + } + return + } + if tA == blas.NoTrans { + if ul == blas.Upper { + for i := 0; i < n; i++ { + ctmp := c[i*ldc+i : i*ldc+n] + atmp := a[i*lda : i*lda+k] + if beta == 0 { + for jc := range ctmp { + j := jc + i + ctmp[jc] = alpha * f32.DotUnitary(atmp, a[j*lda:j*lda+k]) + } + } else { + for jc, vc := range ctmp { + j := jc + i + ctmp[jc] = vc*beta + alpha*f32.DotUnitary(atmp, a[j*lda:j*lda+k]) + } + } + } + return + } + for i := 0; i < n; i++ { + ctmp := c[i*ldc : i*ldc+i+1] + atmp := a[i*lda : i*lda+k] + if beta == 0 { + for j := range ctmp { + ctmp[j] = alpha * f32.DotUnitary(a[j*lda:j*lda+k], atmp) + } + } else { + for j, vc := range ctmp { + ctmp[j] = vc*beta + alpha*f32.DotUnitary(a[j*lda:j*lda+k], atmp) + } + } + } + return + } + // Cases where a is transposed. + if ul == blas.Upper { + for i := 0; i < n; i++ { + ctmp := c[i*ldc+i : i*ldc+n] + if beta == 0 { + for j := range ctmp { + ctmp[j] = 0 + } + } else if beta != 1 { + for j := range ctmp { + ctmp[j] *= beta + } + } + for l := 0; l < k; l++ { + tmp := alpha * a[l*lda+i] + if tmp != 0 { + f32.AxpyUnitary(tmp, a[l*lda+i:l*lda+n], ctmp) + } + } + } + return + } + for i := 0; i < n; i++ { + ctmp := c[i*ldc : i*ldc+i+1] + if beta != 1 { + for j := range ctmp { + ctmp[j] *= beta + } + } + for l := 0; l < k; l++ { + tmp := alpha * a[l*lda+i] + if tmp != 0 { + f32.AxpyUnitary(tmp, a[l*lda:l*lda+i+1], ctmp) + } + } + } +} + +// Ssyr2k performs one of the symmetric rank 2k operations +// +// C = alpha * A * Bᵀ + alpha * B * Aᵀ + beta * C if tA == blas.NoTrans +// C = alpha * Aᵀ * B + alpha * Bᵀ * A + beta * C if tA == blas.Trans or tA == blas.ConjTrans +// +// where A and B are n×k or k×n matrices, C is an n×n symmetric matrix, and +// alpha and beta are scalars. +// +// Float32 implementations are autogenerated and not directly tested. +func (Implementation) Ssyr2k(ul blas.Uplo, tA blas.Transpose, n, k int, alpha float32, a []float32, lda int, b []float32, ldb int, beta float32, c []float32, ldc int) { + if ul != blas.Lower && ul != blas.Upper { + panic(badUplo) + } + if tA != blas.Trans && tA != blas.NoTrans && tA != blas.ConjTrans { + panic(badTranspose) + } + if n < 0 { + panic(nLT0) + } + if k < 0 { + panic(kLT0) + } + row, col := k, n + if tA == blas.NoTrans { + row, col = n, k + } + if lda < max(1, col) { + panic(badLdA) + } + if ldb < max(1, col) { + panic(badLdB) + } + if ldc < max(1, n) { + panic(badLdC) + } + + // Quick return if possible. + if n == 0 { + return + } + + // For zero matrix size the following slice length checks are trivially satisfied. + if len(a) < lda*(row-1)+col { + panic(shortA) + } + if len(b) < ldb*(row-1)+col { + panic(shortB) + } + if len(c) < ldc*(n-1)+n { + panic(shortC) + } + + if alpha == 0 { + if beta == 0 { + if ul == blas.Upper { + for i := 0; i < n; i++ { + ctmp := c[i*ldc+i : i*ldc+n] + for j := range ctmp { + ctmp[j] = 0 + } + } + return + } + for i := 0; i < n; i++ { + ctmp := c[i*ldc : i*ldc+i+1] + for j := range ctmp { + ctmp[j] = 0 + } + } + return + } + if ul == blas.Upper { + for i := 0; i < n; i++ { + ctmp := c[i*ldc+i : i*ldc+n] + for j := range ctmp { + ctmp[j] *= beta + } + } + return + } + for i := 0; i < n; i++ { + ctmp := c[i*ldc : i*ldc+i+1] + for j := range ctmp { + ctmp[j] *= beta + } + } + return + } + if tA == blas.NoTrans { + if ul == blas.Upper { + for i := 0; i < n; i++ { + atmp := a[i*lda : i*lda+k] + btmp := b[i*ldb : i*ldb+k] + ctmp := c[i*ldc+i : i*ldc+n] + if beta == 0 { + for jc := range ctmp { + j := i + jc + var tmp1, tmp2 float32 + binner := b[j*ldb : j*ldb+k] + for l, v := range a[j*lda : j*lda+k] { + tmp1 += v * btmp[l] + tmp2 += atmp[l] * binner[l] + } + ctmp[jc] = alpha * (tmp1 + tmp2) + } + } else { + for jc := range ctmp { + j := i + jc + var tmp1, tmp2 float32 + binner := b[j*ldb : j*ldb+k] + for l, v := range a[j*lda : j*lda+k] { + tmp1 += v * btmp[l] + tmp2 += atmp[l] * binner[l] + } + ctmp[jc] *= beta + ctmp[jc] += alpha * (tmp1 + tmp2) + } + } + } + return + } + for i := 0; i < n; i++ { + atmp := a[i*lda : i*lda+k] + btmp := b[i*ldb : i*ldb+k] + ctmp := c[i*ldc : i*ldc+i+1] + if beta == 0 { + for j := 0; j <= i; j++ { + var tmp1, tmp2 float32 + binner := b[j*ldb : j*ldb+k] + for l, v := range a[j*lda : j*lda+k] { + tmp1 += v * btmp[l] + tmp2 += atmp[l] * binner[l] + } + ctmp[j] = alpha * (tmp1 + tmp2) + } + } else { + for j := 0; j <= i; j++ { + var tmp1, tmp2 float32 + binner := b[j*ldb : j*ldb+k] + for l, v := range a[j*lda : j*lda+k] { + tmp1 += v * btmp[l] + tmp2 += atmp[l] * binner[l] + } + ctmp[j] *= beta + ctmp[j] += alpha * (tmp1 + tmp2) + } + } + } + return + } + if ul == blas.Upper { + for i := 0; i < n; i++ { + ctmp := c[i*ldc+i : i*ldc+n] + switch beta { + case 0: + for j := range ctmp { + ctmp[j] = 0 + } + case 1: + default: + for j := range ctmp { + ctmp[j] *= beta + } + } + for l := 0; l < k; l++ { + tmp1 := alpha * b[l*ldb+i] + tmp2 := alpha * a[l*lda+i] + btmp := b[l*ldb+i : l*ldb+n] + if tmp1 != 0 || tmp2 != 0 { + for j, v := range a[l*lda+i : l*lda+n] { + ctmp[j] += v*tmp1 + btmp[j]*tmp2 + } + } + } + } + return + } + for i := 0; i < n; i++ { + ctmp := c[i*ldc : i*ldc+i+1] + switch beta { + case 0: + for j := range ctmp { + ctmp[j] = 0 + } + case 1: + default: + for j := range ctmp { + ctmp[j] *= beta + } + } + for l := 0; l < k; l++ { + tmp1 := alpha * b[l*ldb+i] + tmp2 := alpha * a[l*lda+i] + btmp := b[l*ldb : l*ldb+i+1] + if tmp1 != 0 || tmp2 != 0 { + for j, v := range a[l*lda : l*lda+i+1] { + ctmp[j] += v*tmp1 + btmp[j]*tmp2 + } + } + } + } +} + +// Strmm performs one of the matrix-matrix operations +// +// B = alpha * A * B if tA == blas.NoTrans and side == blas.Left +// B = alpha * Aᵀ * B if tA == blas.Trans or blas.ConjTrans, and side == blas.Left +// B = alpha * B * A if tA == blas.NoTrans and side == blas.Right +// B = alpha * B * Aᵀ if tA == blas.Trans or blas.ConjTrans, and side == blas.Right +// +// where A is an n×n or m×m triangular matrix, B is an m×n matrix, and alpha is a scalar. +// +// Float32 implementations are autogenerated and not directly tested. +func (Implementation) Strmm(s blas.Side, ul blas.Uplo, tA blas.Transpose, d blas.Diag, m, n int, alpha float32, a []float32, lda int, b []float32, ldb int) { + if s != blas.Left && s != blas.Right { + panic(badSide) + } + if ul != blas.Lower && ul != blas.Upper { + panic(badUplo) + } + if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans { + panic(badTranspose) + } + if d != blas.NonUnit && d != blas.Unit { + panic(badDiag) + } + if m < 0 { + panic(mLT0) + } + if n < 0 { + panic(nLT0) + } + k := n + if s == blas.Left { + k = m + } + if lda < max(1, k) { + panic(badLdA) + } + if ldb < max(1, n) { + panic(badLdB) + } + + // Quick return if possible. + if m == 0 || n == 0 { + return + } + + // For zero matrix size the following slice length checks are trivially satisfied. + if len(a) < lda*(k-1)+k { + panic(shortA) + } + if len(b) < ldb*(m-1)+n { + panic(shortB) + } + + if alpha == 0 { + for i := 0; i < m; i++ { + btmp := b[i*ldb : i*ldb+n] + for j := range btmp { + btmp[j] = 0 + } + } + return + } + + nonUnit := d == blas.NonUnit + if s == blas.Left { + if tA == blas.NoTrans { + if ul == blas.Upper { + for i := 0; i < m; i++ { + tmp := alpha + if nonUnit { + tmp *= a[i*lda+i] + } + btmp := b[i*ldb : i*ldb+n] + f32.ScalUnitary(tmp, btmp) + for ka, va := range a[i*lda+i+1 : i*lda+m] { + k := ka + i + 1 + if va != 0 { + f32.AxpyUnitary(alpha*va, b[k*ldb:k*ldb+n], btmp) + } + } + } + return + } + for i := m - 1; i >= 0; i-- { + tmp := alpha + if nonUnit { + tmp *= a[i*lda+i] + } + btmp := b[i*ldb : i*ldb+n] + f32.ScalUnitary(tmp, btmp) + for k, va := range a[i*lda : i*lda+i] { + if va != 0 { + f32.AxpyUnitary(alpha*va, b[k*ldb:k*ldb+n], btmp) + } + } + } + return + } + // Cases where a is transposed. + if ul == blas.Upper { + for k := m - 1; k >= 0; k-- { + btmpk := b[k*ldb : k*ldb+n] + for ia, va := range a[k*lda+k+1 : k*lda+m] { + i := ia + k + 1 + btmp := b[i*ldb : i*ldb+n] + if va != 0 { + f32.AxpyUnitary(alpha*va, btmpk, btmp) + } + } + tmp := alpha + if nonUnit { + tmp *= a[k*lda+k] + } + if tmp != 1 { + f32.ScalUnitary(tmp, btmpk) + } + } + return + } + for k := 0; k < m; k++ { + btmpk := b[k*ldb : k*ldb+n] + for i, va := range a[k*lda : k*lda+k] { + btmp := b[i*ldb : i*ldb+n] + if va != 0 { + f32.AxpyUnitary(alpha*va, btmpk, btmp) + } + } + tmp := alpha + if nonUnit { + tmp *= a[k*lda+k] + } + if tmp != 1 { + f32.ScalUnitary(tmp, btmpk) + } + } + return + } + // Cases where a is on the right + if tA == blas.NoTrans { + if ul == blas.Upper { + for i := 0; i < m; i++ { + btmp := b[i*ldb : i*ldb+n] + for k := n - 1; k >= 0; k-- { + tmp := alpha * btmp[k] + if tmp == 0 { + continue + } + btmp[k] = tmp + if nonUnit { + btmp[k] *= a[k*lda+k] + } + f32.AxpyUnitary(tmp, a[k*lda+k+1:k*lda+n], btmp[k+1:n]) + } + } + return + } + for i := 0; i < m; i++ { + btmp := b[i*ldb : i*ldb+n] + for k := 0; k < n; k++ { + tmp := alpha * btmp[k] + if tmp == 0 { + continue + } + btmp[k] = tmp + if nonUnit { + btmp[k] *= a[k*lda+k] + } + f32.AxpyUnitary(tmp, a[k*lda:k*lda+k], btmp[:k]) + } + } + return + } + // Cases where a is transposed. + if ul == blas.Upper { + for i := 0; i < m; i++ { + btmp := b[i*ldb : i*ldb+n] + for j, vb := range btmp { + tmp := vb + if nonUnit { + tmp *= a[j*lda+j] + } + tmp += f32.DotUnitary(a[j*lda+j+1:j*lda+n], btmp[j+1:n]) + btmp[j] = alpha * tmp + } + } + return + } + for i := 0; i < m; i++ { + btmp := b[i*ldb : i*ldb+n] + for j := n - 1; j >= 0; j-- { + tmp := btmp[j] + if nonUnit { + tmp *= a[j*lda+j] + } + tmp += f32.DotUnitary(a[j*lda:j*lda+j], btmp[:j]) + btmp[j] = alpha * tmp + } + } +} diff --git a/vendor/gonum.org/v1/gonum/blas/gonum/level3float64.go b/vendor/gonum.org/v1/gonum/blas/gonum/level3float64.go new file mode 100644 index 0000000000..0d203513c1 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/blas/gonum/level3float64.go @@ -0,0 +1,913 @@ +// Copyright ©2014 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gonum + +import ( + "gonum.org/v1/gonum/blas" + "gonum.org/v1/gonum/internal/asm/f64" +) + +var _ blas.Float64Level3 = Implementation{} + +// Dtrsm solves one of the matrix equations +// +// A * X = alpha * B if tA == blas.NoTrans and side == blas.Left +// Aᵀ * X = alpha * B if tA == blas.Trans or blas.ConjTrans, and side == blas.Left +// X * A = alpha * B if tA == blas.NoTrans and side == blas.Right +// X * Aᵀ = alpha * B if tA == blas.Trans or blas.ConjTrans, and side == blas.Right +// +// where A is an n×n or m×m triangular matrix, X and B are m×n matrices, and alpha is a +// scalar. +// +// At entry to the function, X contains the values of B, and the result is +// stored in-place into X. +// +// No check is made that A is invertible. +func (Implementation) Dtrsm(s blas.Side, ul blas.Uplo, tA blas.Transpose, d blas.Diag, m, n int, alpha float64, a []float64, lda int, b []float64, ldb int) { + if s != blas.Left && s != blas.Right { + panic(badSide) + } + if ul != blas.Lower && ul != blas.Upper { + panic(badUplo) + } + if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans { + panic(badTranspose) + } + if d != blas.NonUnit && d != blas.Unit { + panic(badDiag) + } + if m < 0 { + panic(mLT0) + } + if n < 0 { + panic(nLT0) + } + k := n + if s == blas.Left { + k = m + } + if lda < max(1, k) { + panic(badLdA) + } + if ldb < max(1, n) { + panic(badLdB) + } + + // Quick return if possible. + if m == 0 || n == 0 { + return + } + + // For zero matrix size the following slice length checks are trivially satisfied. + if len(a) < lda*(k-1)+k { + panic(shortA) + } + if len(b) < ldb*(m-1)+n { + panic(shortB) + } + + if alpha == 0 { + for i := 0; i < m; i++ { + btmp := b[i*ldb : i*ldb+n] + for j := range btmp { + btmp[j] = 0 + } + } + return + } + nonUnit := d == blas.NonUnit + if s == blas.Left { + if tA == blas.NoTrans { + if ul == blas.Upper { + for i := m - 1; i >= 0; i-- { + btmp := b[i*ldb : i*ldb+n] + if alpha != 1 { + f64.ScalUnitary(alpha, btmp) + } + for ka, va := range a[i*lda+i+1 : i*lda+m] { + if va != 0 { + k := ka + i + 1 + f64.AxpyUnitary(-va, b[k*ldb:k*ldb+n], btmp) + } + } + if nonUnit { + tmp := 1 / a[i*lda+i] + f64.ScalUnitary(tmp, btmp) + } + } + return + } + for i := 0; i < m; i++ { + btmp := b[i*ldb : i*ldb+n] + if alpha != 1 { + f64.ScalUnitary(alpha, btmp) + } + for k, va := range a[i*lda : i*lda+i] { + if va != 0 { + f64.AxpyUnitary(-va, b[k*ldb:k*ldb+n], btmp) + } + } + if nonUnit { + tmp := 1 / a[i*lda+i] + f64.ScalUnitary(tmp, btmp) + } + } + return + } + // Cases where a is transposed + if ul == blas.Upper { + for k := 0; k < m; k++ { + btmpk := b[k*ldb : k*ldb+n] + if nonUnit { + tmp := 1 / a[k*lda+k] + f64.ScalUnitary(tmp, btmpk) + } + for ia, va := range a[k*lda+k+1 : k*lda+m] { + if va != 0 { + i := ia + k + 1 + f64.AxpyUnitary(-va, btmpk, b[i*ldb:i*ldb+n]) + } + } + if alpha != 1 { + f64.ScalUnitary(alpha, btmpk) + } + } + return + } + for k := m - 1; k >= 0; k-- { + btmpk := b[k*ldb : k*ldb+n] + if nonUnit { + tmp := 1 / a[k*lda+k] + f64.ScalUnitary(tmp, btmpk) + } + for i, va := range a[k*lda : k*lda+k] { + if va != 0 { + f64.AxpyUnitary(-va, btmpk, b[i*ldb:i*ldb+n]) + } + } + if alpha != 1 { + f64.ScalUnitary(alpha, btmpk) + } + } + return + } + // Cases where a is to the right of X. + if tA == blas.NoTrans { + if ul == blas.Upper { + for i := 0; i < m; i++ { + btmp := b[i*ldb : i*ldb+n] + if alpha != 1 { + f64.ScalUnitary(alpha, btmp) + } + for k, vb := range btmp { + if vb == 0 { + continue + } + if nonUnit { + btmp[k] /= a[k*lda+k] + } + f64.AxpyUnitary(-btmp[k], a[k*lda+k+1:k*lda+n], btmp[k+1:n]) + } + } + return + } + for i := 0; i < m; i++ { + btmp := b[i*ldb : i*ldb+n] + if alpha != 1 { + f64.ScalUnitary(alpha, btmp) + } + for k := n - 1; k >= 0; k-- { + if btmp[k] == 0 { + continue + } + if nonUnit { + btmp[k] /= a[k*lda+k] + } + f64.AxpyUnitary(-btmp[k], a[k*lda:k*lda+k], btmp[:k]) + } + } + return + } + // Cases where a is transposed. + if ul == blas.Upper { + for i := 0; i < m; i++ { + btmp := b[i*ldb : i*ldb+n] + for j := n - 1; j >= 0; j-- { + tmp := alpha*btmp[j] - f64.DotUnitary(a[j*lda+j+1:j*lda+n], btmp[j+1:]) + if nonUnit { + tmp /= a[j*lda+j] + } + btmp[j] = tmp + } + } + return + } + for i := 0; i < m; i++ { + btmp := b[i*ldb : i*ldb+n] + for j := 0; j < n; j++ { + tmp := alpha*btmp[j] - f64.DotUnitary(a[j*lda:j*lda+j], btmp[:j]) + if nonUnit { + tmp /= a[j*lda+j] + } + btmp[j] = tmp + } + } +} + +// Dsymm performs one of the matrix-matrix operations +// +// C = alpha * A * B + beta * C if side == blas.Left +// C = alpha * B * A + beta * C if side == blas.Right +// +// where A is an n×n or m×m symmetric matrix, B and C are m×n matrices, and alpha +// is a scalar. +func (Implementation) Dsymm(s blas.Side, ul blas.Uplo, m, n int, alpha float64, a []float64, lda int, b []float64, ldb int, beta float64, c []float64, ldc int) { + if s != blas.Right && s != blas.Left { + panic(badSide) + } + if ul != blas.Lower && ul != blas.Upper { + panic(badUplo) + } + if m < 0 { + panic(mLT0) + } + if n < 0 { + panic(nLT0) + } + k := n + if s == blas.Left { + k = m + } + if lda < max(1, k) { + panic(badLdA) + } + if ldb < max(1, n) { + panic(badLdB) + } + if ldc < max(1, n) { + panic(badLdC) + } + + // Quick return if possible. + if m == 0 || n == 0 { + return + } + + // For zero matrix size the following slice length checks are trivially satisfied. + if len(a) < lda*(k-1)+k { + panic(shortA) + } + if len(b) < ldb*(m-1)+n { + panic(shortB) + } + if len(c) < ldc*(m-1)+n { + panic(shortC) + } + + // Quick return if possible. + if alpha == 0 && beta == 1 { + return + } + + if beta == 0 { + for i := 0; i < m; i++ { + ctmp := c[i*ldc : i*ldc+n] + for j := range ctmp { + ctmp[j] = 0 + } + } + } + + if alpha == 0 { + if beta != 0 { + for i := 0; i < m; i++ { + ctmp := c[i*ldc : i*ldc+n] + for j := 0; j < n; j++ { + ctmp[j] *= beta + } + } + } + return + } + + isUpper := ul == blas.Upper + if s == blas.Left { + for i := 0; i < m; i++ { + atmp := alpha * a[i*lda+i] + btmp := b[i*ldb : i*ldb+n] + ctmp := c[i*ldc : i*ldc+n] + for j, v := range btmp { + ctmp[j] *= beta + ctmp[j] += atmp * v + } + + for k := 0; k < i; k++ { + var atmp float64 + if isUpper { + atmp = a[k*lda+i] + } else { + atmp = a[i*lda+k] + } + atmp *= alpha + f64.AxpyUnitary(atmp, b[k*ldb:k*ldb+n], ctmp) + } + for k := i + 1; k < m; k++ { + var atmp float64 + if isUpper { + atmp = a[i*lda+k] + } else { + atmp = a[k*lda+i] + } + atmp *= alpha + f64.AxpyUnitary(atmp, b[k*ldb:k*ldb+n], ctmp) + } + } + return + } + if isUpper { + for i := 0; i < m; i++ { + for j := n - 1; j >= 0; j-- { + tmp := alpha * b[i*ldb+j] + var tmp2 float64 + atmp := a[j*lda+j+1 : j*lda+n] + btmp := b[i*ldb+j+1 : i*ldb+n] + ctmp := c[i*ldc+j+1 : i*ldc+n] + for k, v := range atmp { + ctmp[k] += tmp * v + tmp2 += btmp[k] * v + } + c[i*ldc+j] *= beta + c[i*ldc+j] += tmp*a[j*lda+j] + alpha*tmp2 + } + } + return + } + for i := 0; i < m; i++ { + for j := 0; j < n; j++ { + tmp := alpha * b[i*ldb+j] + var tmp2 float64 + atmp := a[j*lda : j*lda+j] + btmp := b[i*ldb : i*ldb+j] + ctmp := c[i*ldc : i*ldc+j] + for k, v := range atmp { + ctmp[k] += tmp * v + tmp2 += btmp[k] * v + } + c[i*ldc+j] *= beta + c[i*ldc+j] += tmp*a[j*lda+j] + alpha*tmp2 + } + } +} + +// Dsyrk performs one of the symmetric rank-k operations +// +// C = alpha * A * Aᵀ + beta * C if tA == blas.NoTrans +// C = alpha * Aᵀ * A + beta * C if tA == blas.Trans or tA == blas.ConjTrans +// +// where A is an n×k or k×n matrix, C is an n×n symmetric matrix, and alpha and +// beta are scalars. +func (Implementation) Dsyrk(ul blas.Uplo, tA blas.Transpose, n, k int, alpha float64, a []float64, lda int, beta float64, c []float64, ldc int) { + if ul != blas.Lower && ul != blas.Upper { + panic(badUplo) + } + if tA != blas.Trans && tA != blas.NoTrans && tA != blas.ConjTrans { + panic(badTranspose) + } + if n < 0 { + panic(nLT0) + } + if k < 0 { + panic(kLT0) + } + row, col := k, n + if tA == blas.NoTrans { + row, col = n, k + } + if lda < max(1, col) { + panic(badLdA) + } + if ldc < max(1, n) { + panic(badLdC) + } + + // Quick return if possible. + if n == 0 { + return + } + + // For zero matrix size the following slice length checks are trivially satisfied. + if len(a) < lda*(row-1)+col { + panic(shortA) + } + if len(c) < ldc*(n-1)+n { + panic(shortC) + } + + if alpha == 0 { + if beta == 0 { + if ul == blas.Upper { + for i := 0; i < n; i++ { + ctmp := c[i*ldc+i : i*ldc+n] + for j := range ctmp { + ctmp[j] = 0 + } + } + return + } + for i := 0; i < n; i++ { + ctmp := c[i*ldc : i*ldc+i+1] + for j := range ctmp { + ctmp[j] = 0 + } + } + return + } + if ul == blas.Upper { + for i := 0; i < n; i++ { + ctmp := c[i*ldc+i : i*ldc+n] + for j := range ctmp { + ctmp[j] *= beta + } + } + return + } + for i := 0; i < n; i++ { + ctmp := c[i*ldc : i*ldc+i+1] + for j := range ctmp { + ctmp[j] *= beta + } + } + return + } + if tA == blas.NoTrans { + if ul == blas.Upper { + for i := 0; i < n; i++ { + ctmp := c[i*ldc+i : i*ldc+n] + atmp := a[i*lda : i*lda+k] + if beta == 0 { + for jc := range ctmp { + j := jc + i + ctmp[jc] = alpha * f64.DotUnitary(atmp, a[j*lda:j*lda+k]) + } + } else { + for jc, vc := range ctmp { + j := jc + i + ctmp[jc] = vc*beta + alpha*f64.DotUnitary(atmp, a[j*lda:j*lda+k]) + } + } + } + return + } + for i := 0; i < n; i++ { + ctmp := c[i*ldc : i*ldc+i+1] + atmp := a[i*lda : i*lda+k] + if beta == 0 { + for j := range ctmp { + ctmp[j] = alpha * f64.DotUnitary(a[j*lda:j*lda+k], atmp) + } + } else { + for j, vc := range ctmp { + ctmp[j] = vc*beta + alpha*f64.DotUnitary(a[j*lda:j*lda+k], atmp) + } + } + } + return + } + // Cases where a is transposed. + if ul == blas.Upper { + for i := 0; i < n; i++ { + ctmp := c[i*ldc+i : i*ldc+n] + if beta == 0 { + for j := range ctmp { + ctmp[j] = 0 + } + } else if beta != 1 { + for j := range ctmp { + ctmp[j] *= beta + } + } + for l := 0; l < k; l++ { + tmp := alpha * a[l*lda+i] + if tmp != 0 { + f64.AxpyUnitary(tmp, a[l*lda+i:l*lda+n], ctmp) + } + } + } + return + } + for i := 0; i < n; i++ { + ctmp := c[i*ldc : i*ldc+i+1] + if beta != 1 { + for j := range ctmp { + ctmp[j] *= beta + } + } + for l := 0; l < k; l++ { + tmp := alpha * a[l*lda+i] + if tmp != 0 { + f64.AxpyUnitary(tmp, a[l*lda:l*lda+i+1], ctmp) + } + } + } +} + +// Dsyr2k performs one of the symmetric rank 2k operations +// +// C = alpha * A * Bᵀ + alpha * B * Aᵀ + beta * C if tA == blas.NoTrans +// C = alpha * Aᵀ * B + alpha * Bᵀ * A + beta * C if tA == blas.Trans or tA == blas.ConjTrans +// +// where A and B are n×k or k×n matrices, C is an n×n symmetric matrix, and +// alpha and beta are scalars. +func (Implementation) Dsyr2k(ul blas.Uplo, tA blas.Transpose, n, k int, alpha float64, a []float64, lda int, b []float64, ldb int, beta float64, c []float64, ldc int) { + if ul != blas.Lower && ul != blas.Upper { + panic(badUplo) + } + if tA != blas.Trans && tA != blas.NoTrans && tA != blas.ConjTrans { + panic(badTranspose) + } + if n < 0 { + panic(nLT0) + } + if k < 0 { + panic(kLT0) + } + row, col := k, n + if tA == blas.NoTrans { + row, col = n, k + } + if lda < max(1, col) { + panic(badLdA) + } + if ldb < max(1, col) { + panic(badLdB) + } + if ldc < max(1, n) { + panic(badLdC) + } + + // Quick return if possible. + if n == 0 { + return + } + + // For zero matrix size the following slice length checks are trivially satisfied. + if len(a) < lda*(row-1)+col { + panic(shortA) + } + if len(b) < ldb*(row-1)+col { + panic(shortB) + } + if len(c) < ldc*(n-1)+n { + panic(shortC) + } + + if alpha == 0 { + if beta == 0 { + if ul == blas.Upper { + for i := 0; i < n; i++ { + ctmp := c[i*ldc+i : i*ldc+n] + for j := range ctmp { + ctmp[j] = 0 + } + } + return + } + for i := 0; i < n; i++ { + ctmp := c[i*ldc : i*ldc+i+1] + for j := range ctmp { + ctmp[j] = 0 + } + } + return + } + if ul == blas.Upper { + for i := 0; i < n; i++ { + ctmp := c[i*ldc+i : i*ldc+n] + for j := range ctmp { + ctmp[j] *= beta + } + } + return + } + for i := 0; i < n; i++ { + ctmp := c[i*ldc : i*ldc+i+1] + for j := range ctmp { + ctmp[j] *= beta + } + } + return + } + if tA == blas.NoTrans { + if ul == blas.Upper { + for i := 0; i < n; i++ { + atmp := a[i*lda : i*lda+k] + btmp := b[i*ldb : i*ldb+k] + ctmp := c[i*ldc+i : i*ldc+n] + if beta == 0 { + for jc := range ctmp { + j := i + jc + var tmp1, tmp2 float64 + binner := b[j*ldb : j*ldb+k] + for l, v := range a[j*lda : j*lda+k] { + tmp1 += v * btmp[l] + tmp2 += atmp[l] * binner[l] + } + ctmp[jc] = alpha * (tmp1 + tmp2) + } + } else { + for jc := range ctmp { + j := i + jc + var tmp1, tmp2 float64 + binner := b[j*ldb : j*ldb+k] + for l, v := range a[j*lda : j*lda+k] { + tmp1 += v * btmp[l] + tmp2 += atmp[l] * binner[l] + } + ctmp[jc] *= beta + ctmp[jc] += alpha * (tmp1 + tmp2) + } + } + } + return + } + for i := 0; i < n; i++ { + atmp := a[i*lda : i*lda+k] + btmp := b[i*ldb : i*ldb+k] + ctmp := c[i*ldc : i*ldc+i+1] + if beta == 0 { + for j := 0; j <= i; j++ { + var tmp1, tmp2 float64 + binner := b[j*ldb : j*ldb+k] + for l, v := range a[j*lda : j*lda+k] { + tmp1 += v * btmp[l] + tmp2 += atmp[l] * binner[l] + } + ctmp[j] = alpha * (tmp1 + tmp2) + } + } else { + for j := 0; j <= i; j++ { + var tmp1, tmp2 float64 + binner := b[j*ldb : j*ldb+k] + for l, v := range a[j*lda : j*lda+k] { + tmp1 += v * btmp[l] + tmp2 += atmp[l] * binner[l] + } + ctmp[j] *= beta + ctmp[j] += alpha * (tmp1 + tmp2) + } + } + } + return + } + if ul == blas.Upper { + for i := 0; i < n; i++ { + ctmp := c[i*ldc+i : i*ldc+n] + switch beta { + case 0: + for j := range ctmp { + ctmp[j] = 0 + } + case 1: + default: + for j := range ctmp { + ctmp[j] *= beta + } + } + for l := 0; l < k; l++ { + tmp1 := alpha * b[l*ldb+i] + tmp2 := alpha * a[l*lda+i] + btmp := b[l*ldb+i : l*ldb+n] + if tmp1 != 0 || tmp2 != 0 { + for j, v := range a[l*lda+i : l*lda+n] { + ctmp[j] += v*tmp1 + btmp[j]*tmp2 + } + } + } + } + return + } + for i := 0; i < n; i++ { + ctmp := c[i*ldc : i*ldc+i+1] + switch beta { + case 0: + for j := range ctmp { + ctmp[j] = 0 + } + case 1: + default: + for j := range ctmp { + ctmp[j] *= beta + } + } + for l := 0; l < k; l++ { + tmp1 := alpha * b[l*ldb+i] + tmp2 := alpha * a[l*lda+i] + btmp := b[l*ldb : l*ldb+i+1] + if tmp1 != 0 || tmp2 != 0 { + for j, v := range a[l*lda : l*lda+i+1] { + ctmp[j] += v*tmp1 + btmp[j]*tmp2 + } + } + } + } +} + +// Dtrmm performs one of the matrix-matrix operations +// +// B = alpha * A * B if tA == blas.NoTrans and side == blas.Left +// B = alpha * Aᵀ * B if tA == blas.Trans or blas.ConjTrans, and side == blas.Left +// B = alpha * B * A if tA == blas.NoTrans and side == blas.Right +// B = alpha * B * Aᵀ if tA == blas.Trans or blas.ConjTrans, and side == blas.Right +// +// where A is an n×n or m×m triangular matrix, B is an m×n matrix, and alpha is a scalar. +func (Implementation) Dtrmm(s blas.Side, ul blas.Uplo, tA blas.Transpose, d blas.Diag, m, n int, alpha float64, a []float64, lda int, b []float64, ldb int) { + if s != blas.Left && s != blas.Right { + panic(badSide) + } + if ul != blas.Lower && ul != blas.Upper { + panic(badUplo) + } + if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans { + panic(badTranspose) + } + if d != blas.NonUnit && d != blas.Unit { + panic(badDiag) + } + if m < 0 { + panic(mLT0) + } + if n < 0 { + panic(nLT0) + } + k := n + if s == blas.Left { + k = m + } + if lda < max(1, k) { + panic(badLdA) + } + if ldb < max(1, n) { + panic(badLdB) + } + + // Quick return if possible. + if m == 0 || n == 0 { + return + } + + // For zero matrix size the following slice length checks are trivially satisfied. + if len(a) < lda*(k-1)+k { + panic(shortA) + } + if len(b) < ldb*(m-1)+n { + panic(shortB) + } + + if alpha == 0 { + for i := 0; i < m; i++ { + btmp := b[i*ldb : i*ldb+n] + for j := range btmp { + btmp[j] = 0 + } + } + return + } + + nonUnit := d == blas.NonUnit + if s == blas.Left { + if tA == blas.NoTrans { + if ul == blas.Upper { + for i := 0; i < m; i++ { + tmp := alpha + if nonUnit { + tmp *= a[i*lda+i] + } + btmp := b[i*ldb : i*ldb+n] + f64.ScalUnitary(tmp, btmp) + for ka, va := range a[i*lda+i+1 : i*lda+m] { + k := ka + i + 1 + if va != 0 { + f64.AxpyUnitary(alpha*va, b[k*ldb:k*ldb+n], btmp) + } + } + } + return + } + for i := m - 1; i >= 0; i-- { + tmp := alpha + if nonUnit { + tmp *= a[i*lda+i] + } + btmp := b[i*ldb : i*ldb+n] + f64.ScalUnitary(tmp, btmp) + for k, va := range a[i*lda : i*lda+i] { + if va != 0 { + f64.AxpyUnitary(alpha*va, b[k*ldb:k*ldb+n], btmp) + } + } + } + return + } + // Cases where a is transposed. + if ul == blas.Upper { + for k := m - 1; k >= 0; k-- { + btmpk := b[k*ldb : k*ldb+n] + for ia, va := range a[k*lda+k+1 : k*lda+m] { + i := ia + k + 1 + btmp := b[i*ldb : i*ldb+n] + if va != 0 { + f64.AxpyUnitary(alpha*va, btmpk, btmp) + } + } + tmp := alpha + if nonUnit { + tmp *= a[k*lda+k] + } + if tmp != 1 { + f64.ScalUnitary(tmp, btmpk) + } + } + return + } + for k := 0; k < m; k++ { + btmpk := b[k*ldb : k*ldb+n] + for i, va := range a[k*lda : k*lda+k] { + btmp := b[i*ldb : i*ldb+n] + if va != 0 { + f64.AxpyUnitary(alpha*va, btmpk, btmp) + } + } + tmp := alpha + if nonUnit { + tmp *= a[k*lda+k] + } + if tmp != 1 { + f64.ScalUnitary(tmp, btmpk) + } + } + return + } + // Cases where a is on the right + if tA == blas.NoTrans { + if ul == blas.Upper { + for i := 0; i < m; i++ { + btmp := b[i*ldb : i*ldb+n] + for k := n - 1; k >= 0; k-- { + tmp := alpha * btmp[k] + if tmp == 0 { + continue + } + btmp[k] = tmp + if nonUnit { + btmp[k] *= a[k*lda+k] + } + f64.AxpyUnitary(tmp, a[k*lda+k+1:k*lda+n], btmp[k+1:n]) + } + } + return + } + for i := 0; i < m; i++ { + btmp := b[i*ldb : i*ldb+n] + for k := 0; k < n; k++ { + tmp := alpha * btmp[k] + if tmp == 0 { + continue + } + btmp[k] = tmp + if nonUnit { + btmp[k] *= a[k*lda+k] + } + f64.AxpyUnitary(tmp, a[k*lda:k*lda+k], btmp[:k]) + } + } + return + } + // Cases where a is transposed. + if ul == blas.Upper { + for i := 0; i < m; i++ { + btmp := b[i*ldb : i*ldb+n] + for j, vb := range btmp { + tmp := vb + if nonUnit { + tmp *= a[j*lda+j] + } + tmp += f64.DotUnitary(a[j*lda+j+1:j*lda+n], btmp[j+1:n]) + btmp[j] = alpha * tmp + } + } + return + } + for i := 0; i < m; i++ { + btmp := b[i*ldb : i*ldb+n] + for j := n - 1; j >= 0; j-- { + tmp := btmp[j] + if nonUnit { + tmp *= a[j*lda+j] + } + tmp += f64.DotUnitary(a[j*lda:j*lda+j], btmp[:j]) + btmp[j] = alpha * tmp + } + } +} diff --git a/vendor/gonum.org/v1/gonum/blas/gonum/sgemm.go b/vendor/gonum.org/v1/gonum/blas/gonum/sgemm.go new file mode 100644 index 0000000000..7b03ce46a8 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/blas/gonum/sgemm.go @@ -0,0 +1,301 @@ +// Code generated by "go generate gonum.org/v1/gonum/blas/gonum”; DO NOT EDIT. + +// Copyright ©2014 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gonum + +import ( + "runtime" + "sync" + + "gonum.org/v1/gonum/blas" + "gonum.org/v1/gonum/internal/asm/f32" +) + +// Sgemm performs one of the matrix-matrix operations +// +// C = alpha * A * B + beta * C +// C = alpha * Aᵀ * B + beta * C +// C = alpha * A * Bᵀ + beta * C +// C = alpha * Aᵀ * Bᵀ + beta * C +// +// where A is an m×k or k×m dense matrix, B is an n×k or k×n dense matrix, C is +// an m×n matrix, and alpha and beta are scalars. tA and tB specify whether A or +// B are transposed. +// +// Float32 implementations are autogenerated and not directly tested. +func (Implementation) Sgemm(tA, tB blas.Transpose, m, n, k int, alpha float32, a []float32, lda int, b []float32, ldb int, beta float32, c []float32, ldc int) { + switch tA { + default: + panic(badTranspose) + case blas.NoTrans, blas.Trans, blas.ConjTrans: + } + switch tB { + default: + panic(badTranspose) + case blas.NoTrans, blas.Trans, blas.ConjTrans: + } + if m < 0 { + panic(mLT0) + } + if n < 0 { + panic(nLT0) + } + if k < 0 { + panic(kLT0) + } + aTrans := tA == blas.Trans || tA == blas.ConjTrans + if aTrans { + if lda < max(1, m) { + panic(badLdA) + } + } else { + if lda < max(1, k) { + panic(badLdA) + } + } + bTrans := tB == blas.Trans || tB == blas.ConjTrans + if bTrans { + if ldb < max(1, k) { + panic(badLdB) + } + } else { + if ldb < max(1, n) { + panic(badLdB) + } + } + if ldc < max(1, n) { + panic(badLdC) + } + + // Quick return if possible. + if m == 0 || n == 0 { + return + } + + // For zero matrix size the following slice length checks are trivially satisfied. + if aTrans { + if len(a) < (k-1)*lda+m { + panic(shortA) + } + } else { + if len(a) < (m-1)*lda+k { + panic(shortA) + } + } + if bTrans { + if len(b) < (n-1)*ldb+k { + panic(shortB) + } + } else { + if len(b) < (k-1)*ldb+n { + panic(shortB) + } + } + if len(c) < (m-1)*ldc+n { + panic(shortC) + } + + // Quick return if possible. + if (alpha == 0 || k == 0) && beta == 1 { + return + } + + // scale c + if beta != 1 { + if beta == 0 { + for i := 0; i < m; i++ { + ctmp := c[i*ldc : i*ldc+n] + for j := range ctmp { + ctmp[j] = 0 + } + } + } else { + for i := 0; i < m; i++ { + ctmp := c[i*ldc : i*ldc+n] + for j := range ctmp { + ctmp[j] *= beta + } + } + } + } + + sgemmParallel(aTrans, bTrans, m, n, k, a, lda, b, ldb, c, ldc, alpha) +} + +func sgemmParallel(aTrans, bTrans bool, m, n, k int, a []float32, lda int, b []float32, ldb int, c []float32, ldc int, alpha float32) { + // dgemmParallel computes a parallel matrix multiplication by partitioning + // a and b into sub-blocks, and updating c with the multiplication of the sub-block + // In all cases, + // A = [ A_11 A_12 ... A_1j + // A_21 A_22 ... A_2j + // ... + // A_i1 A_i2 ... A_ij] + // + // and same for B. All of the submatrix sizes are blockSize×blockSize except + // at the edges. + // + // In all cases, there is one dimension for each matrix along which + // C must be updated sequentially. + // Cij = \sum_k Aik Bki, (A * B) + // Cij = \sum_k Aki Bkj, (Aᵀ * B) + // Cij = \sum_k Aik Bjk, (A * Bᵀ) + // Cij = \sum_k Aki Bjk, (Aᵀ * Bᵀ) + // + // This code computes one {i, j} block sequentially along the k dimension, + // and computes all of the {i, j} blocks concurrently. This + // partitioning allows Cij to be updated in-place without race-conditions. + // Instead of launching a goroutine for each possible concurrent computation, + // a number of worker goroutines are created and channels are used to pass + // available and completed cases. + // + // http://alexkr.com/docs/matrixmult.pdf is a good reference on matrix-matrix + // multiplies, though this code does not copy matrices to attempt to eliminate + // cache misses. + + maxKLen := k + parBlocks := blocks(m, blockSize) * blocks(n, blockSize) + if parBlocks < minParBlock { + // The matrix multiplication is small in the dimensions where it can be + // computed concurrently. Just do it in serial. + sgemmSerial(aTrans, bTrans, m, n, k, a, lda, b, ldb, c, ldc, alpha) + return + } + + // workerLimit acts a number of maximum concurrent workers, + // with the limit set to the number of procs available. + workerLimit := make(chan struct{}, runtime.GOMAXPROCS(0)) + + // wg is used to wait for all + var wg sync.WaitGroup + wg.Add(parBlocks) + defer wg.Wait() + + for i := 0; i < m; i += blockSize { + for j := 0; j < n; j += blockSize { + workerLimit <- struct{}{} + go func(i, j int) { + defer func() { + wg.Done() + <-workerLimit + }() + + leni := blockSize + if i+leni > m { + leni = m - i + } + lenj := blockSize + if j+lenj > n { + lenj = n - j + } + + cSub := sliceView32(c, ldc, i, j, leni, lenj) + + // Compute A_ik B_kj for all k + for k := 0; k < maxKLen; k += blockSize { + lenk := blockSize + if k+lenk > maxKLen { + lenk = maxKLen - k + } + var aSub, bSub []float32 + if aTrans { + aSub = sliceView32(a, lda, k, i, lenk, leni) + } else { + aSub = sliceView32(a, lda, i, k, leni, lenk) + } + if bTrans { + bSub = sliceView32(b, ldb, j, k, lenj, lenk) + } else { + bSub = sliceView32(b, ldb, k, j, lenk, lenj) + } + sgemmSerial(aTrans, bTrans, leni, lenj, lenk, aSub, lda, bSub, ldb, cSub, ldc, alpha) + } + }(i, j) + } + } +} + +// sgemmSerial is serial matrix multiply +func sgemmSerial(aTrans, bTrans bool, m, n, k int, a []float32, lda int, b []float32, ldb int, c []float32, ldc int, alpha float32) { + switch { + case !aTrans && !bTrans: + sgemmSerialNotNot(m, n, k, a, lda, b, ldb, c, ldc, alpha) + return + case aTrans && !bTrans: + sgemmSerialTransNot(m, n, k, a, lda, b, ldb, c, ldc, alpha) + return + case !aTrans && bTrans: + sgemmSerialNotTrans(m, n, k, a, lda, b, ldb, c, ldc, alpha) + return + case aTrans && bTrans: + sgemmSerialTransTrans(m, n, k, a, lda, b, ldb, c, ldc, alpha) + return + default: + panic("unreachable") + } +} + +// sgemmSerial where neither a nor b are transposed +func sgemmSerialNotNot(m, n, k int, a []float32, lda int, b []float32, ldb int, c []float32, ldc int, alpha float32) { + // This style is used instead of the literal [i*stride +j]) is used because + // approximately 5 times faster as of go 1.3. + for i := 0; i < m; i++ { + ctmp := c[i*ldc : i*ldc+n] + for l, v := range a[i*lda : i*lda+k] { + tmp := alpha * v + if tmp != 0 { + f32.AxpyUnitary(tmp, b[l*ldb:l*ldb+n], ctmp) + } + } + } +} + +// sgemmSerial where neither a is transposed and b is not +func sgemmSerialTransNot(m, n, k int, a []float32, lda int, b []float32, ldb int, c []float32, ldc int, alpha float32) { + // This style is used instead of the literal [i*stride +j]) is used because + // approximately 5 times faster as of go 1.3. + for l := 0; l < k; l++ { + btmp := b[l*ldb : l*ldb+n] + for i, v := range a[l*lda : l*lda+m] { + tmp := alpha * v + if tmp != 0 { + ctmp := c[i*ldc : i*ldc+n] + f32.AxpyUnitary(tmp, btmp, ctmp) + } + } + } +} + +// sgemmSerial where neither a is not transposed and b is +func sgemmSerialNotTrans(m, n, k int, a []float32, lda int, b []float32, ldb int, c []float32, ldc int, alpha float32) { + // This style is used instead of the literal [i*stride +j]) is used because + // approximately 5 times faster as of go 1.3. + for i := 0; i < m; i++ { + atmp := a[i*lda : i*lda+k] + ctmp := c[i*ldc : i*ldc+n] + for j := 0; j < n; j++ { + ctmp[j] += alpha * f32.DotUnitary(atmp, b[j*ldb:j*ldb+k]) + } + } +} + +// sgemmSerial where both are transposed +func sgemmSerialTransTrans(m, n, k int, a []float32, lda int, b []float32, ldb int, c []float32, ldc int, alpha float32) { + // This style is used instead of the literal [i*stride +j]) is used because + // approximately 5 times faster as of go 1.3. + for l := 0; l < k; l++ { + for i, v := range a[l*lda : l*lda+m] { + tmp := alpha * v + if tmp != 0 { + ctmp := c[i*ldc : i*ldc+n] + f32.AxpyInc(tmp, b[l:], ctmp, uintptr(n), uintptr(ldb), 1, 0, 0) + } + } + } +} + +func sliceView32(a []float32, lda, i, j, r, c int) []float32 { + return a[i*lda+j : (i+r-1)*lda+j+c] +} diff --git a/vendor/gonum.org/v1/gonum/blas/gonum/single_precision.bash b/vendor/gonum.org/v1/gonum/blas/gonum/single_precision.bash new file mode 100644 index 0000000000..a107fce492 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/blas/gonum/single_precision.bash @@ -0,0 +1,224 @@ +#!/usr/bin/env bash + +# Copyright ©2015 The Gonum Authors. All rights reserved. +# Use of this source code is governed by a BSD-style +# license that can be found in the LICENSE file. + +WARNINGF32='//\ +// Float32 implementations are autogenerated and not directly tested.\ +' +WARNINGC64='//\ +// Complex64 implementations are autogenerated and not directly tested.\ +' + +# Level1 routines. + +echo Generating level1float32.go +echo -e '// Code generated by "go generate gonum.org/v1/gonum/blas/gonum”; DO NOT EDIT.\n' > level1float32.go +cat level1float64.go \ +| gofmt -r 'blas.Float64Level1 -> blas.Float32Level1' \ +\ +| gofmt -r 'float64 -> float32' \ +| gofmt -r 'blas.DrotmParams -> blas.SrotmParams' \ +\ +| gofmt -r 'f64.AxpyInc -> f32.AxpyInc' \ +| gofmt -r 'f64.AxpyUnitary -> f32.AxpyUnitary' \ +| gofmt -r 'f64.DotUnitary -> f32.DotUnitary' \ +| gofmt -r 'f64.L2NormInc -> f32.L2NormInc' \ +| gofmt -r 'f64.L2NormUnitary -> f32.L2NormUnitary' \ +| gofmt -r 'f64.ScalInc -> f32.ScalInc' \ +| gofmt -r 'f64.ScalUnitary -> f32.ScalUnitary' \ +\ +| sed -e "s_^\(func (Implementation) \)D\(.*\)\$_$WARNINGF32\1S\2_" \ + -e 's_^// D_// S_' \ + -e "s_^\(func (Implementation) \)Id\(.*\)\$_$WARNINGF32\1Is\2_" \ + -e 's_^// Id_// Is_' \ + -e 's_"gonum.org/v1/gonum/internal/asm/f64"_"gonum.org/v1/gonum/internal/asm/f32"_' \ + -e 's_"math"_math "gonum.org/v1/gonum/internal/math32"_' \ + -e 's_safmin = 0x1p-1022_safmin = 0x1p-126_' \ +>> level1float32.go + +echo Generating level1cmplx64.go +echo -e '// Code generated by "go generate gonum.org/v1/gonum/blas/gonum”; DO NOT EDIT.\n' > level1cmplx64.go +cat level1cmplx128.go \ +| gofmt -r 'blas.Complex128Level1 -> blas.Complex64Level1' \ +\ +| gofmt -r 'float64 -> float32' \ +| gofmt -r 'complex128 -> complex64' \ +\ +| gofmt -r 'c128.AxpyInc -> c64.AxpyInc' \ +| gofmt -r 'c128.AxpyUnitary -> c64.AxpyUnitary' \ +| gofmt -r 'c128.DotcInc -> c64.DotcInc' \ +| gofmt -r 'c128.DotcUnitary -> c64.DotcUnitary' \ +| gofmt -r 'c128.DotuInc -> c64.DotuInc' \ +| gofmt -r 'c128.DotuUnitary -> c64.DotuUnitary' \ +| gofmt -r 'c128.ScalInc -> c64.ScalInc' \ +| gofmt -r 'c128.ScalUnitary -> c64.ScalUnitary' \ +| gofmt -r 'dcabs1 -> scabs1' \ +\ +| sed -e "s_^\(func (Implementation) \)Zdot\(.*\)\$_$WARNINGC64\1Cdot\2_" \ + -e 's_^// Zdot_// Cdot_' \ + -e "s_^\(func (Implementation) \)Zdscal\(.*\)\$_$WARNINGC64\1Csscal\2_" \ + -e 's_^// Zdscal_// Csscal_' \ + -e "s_^\(func (Implementation) \)Z\(.*\)\$_$WARNINGC64\1C\2_" \ + -e 's_^// Z_// C_' \ + -e "s_^\(func (Implementation) \)Iz\(.*\)\$_$WARNINGC64\1Ic\2_" \ + -e 's_^// Iz_// Ic_' \ + -e "s_^\(func (Implementation) \)Dz\(.*\)\$_$WARNINGC64\1Sc\2_" \ + -e 's_^// Dz_// Sc_' \ + -e 's_"gonum.org/v1/gonum/internal/asm/c128"_"gonum.org/v1/gonum/internal/asm/c64"_' \ + -e 's_"math"_math "gonum.org/v1/gonum/internal/math32"_' \ +>> level1cmplx64.go + +echo Generating level1float32_sdot.go +echo -e '// Code generated by "go generate gonum.org/v1/gonum/blas/gonum”; DO NOT EDIT.\n' > level1float32_sdot.go +cat level1float64_ddot.go \ +| gofmt -r 'float64 -> float32' \ +\ +| gofmt -r 'f64.DotInc -> f32.DotInc' \ +| gofmt -r 'f64.DotUnitary -> f32.DotUnitary' \ +\ +| sed -e "s_^\(func (Implementation) \)D\(.*\)\$_$WARNINGF32\1S\2_" \ + -e 's_^// D_// S_' \ + -e 's_"gonum.org/v1/gonum/internal/asm/f64"_"gonum.org/v1/gonum/internal/asm/f32"_' \ +>> level1float32_sdot.go + +echo Generating level1float32_dsdot.go +echo -e '// Code generated by "go generate gonum.org/v1/gonum/blas/gonum”; DO NOT EDIT.\n' > level1float32_dsdot.go +cat level1float64_ddot.go \ +| gofmt -r '[]float64 -> []float32' \ +\ +| gofmt -r 'f64.DotInc -> f32.DdotInc' \ +| gofmt -r 'f64.DotUnitary -> f32.DdotUnitary' \ +\ +| sed -e "s_^\(func (Implementation) \)D\(.*\)\$_$WARNINGF32\1Ds\2_" \ + -e 's_^// D_// Ds_' \ + -e 's_"gonum.org/v1/gonum/internal/asm/f64"_"gonum.org/v1/gonum/internal/asm/f32"_' \ +>> level1float32_dsdot.go + +echo Generating level1float32_sdsdot.go +echo -e '// Code generated by "go generate gonum.org/v1/gonum/blas/gonum”; DO NOT EDIT.\n' > level1float32_sdsdot.go +cat level1float64_ddot.go \ +| gofmt -r 'float64 -> float32' \ +\ +| gofmt -r 'f64.DotInc(x, y, f(n), f(incX), f(incY), f(ix), f(iy)) -> alpha + float32(f32.DdotInc(x, y, f(n), f(incX), f(incY), f(ix), f(iy)))' \ +| gofmt -r 'f64.DotUnitary(a, b) -> alpha + float32(f32.DdotUnitary(a, b))' \ +\ +| sed -e "s_^\(func (Implementation) \)D\(.*\)\$_$WARNINGF32\1Sds\2_" \ + -e 's_^// D\(.*\)$_// Sds\1 plus a constant_' \ + -e 's_\\sum_alpha + \\sum_' \ + -e 's/n int/n int, alpha float32/' \ + -e 's_"gonum.org/v1/gonum/internal/asm/f64"_"gonum.org/v1/gonum/internal/asm/f32"_' \ +>> level1float32_sdsdot.go + + +# Level2 routines. + +echo Generating level2float32.go +echo -e '// Code generated by "go generate gonum.org/v1/gonum/blas/gonum”; DO NOT EDIT.\n' > level2float32.go +cat level2float64.go \ +| gofmt -r 'blas.Float64Level2 -> blas.Float32Level2' \ +\ +| gofmt -r 'float64 -> float32' \ +\ +| gofmt -r 'f64.AxpyInc -> f32.AxpyInc' \ +| gofmt -r 'f64.AxpyIncTo -> f32.AxpyIncTo' \ +| gofmt -r 'f64.AxpyUnitary -> f32.AxpyUnitary' \ +| gofmt -r 'f64.AxpyUnitaryTo -> f32.AxpyUnitaryTo' \ +| gofmt -r 'f64.DotInc -> f32.DotInc' \ +| gofmt -r 'f64.DotUnitary -> f32.DotUnitary' \ +| gofmt -r 'f64.ScalInc -> f32.ScalInc' \ +| gofmt -r 'f64.ScalUnitary -> f32.ScalUnitary' \ +| gofmt -r 'f64.Ger -> f32.Ger' \ +| gofmt -r 'f64.GemvN -> f32.GemvN' \ +| gofmt -r 'f64.GemvT -> f32.GemvT' \ +| gofmt -r 'Implementation{}.Dscal -> Implementation{}.Sscal' \ +\ +| sed -e "s_^\(func (Implementation) \)D\(.*\)\$_$WARNINGF32\1S\2_" \ + -e 's_^// D_// S_' \ + -e 's_"gonum.org/v1/gonum/internal/asm/f64"_"gonum.org/v1/gonum/internal/asm/f32"_' \ +>> level2float32.go + +echo Generating level2cmplx64.go +echo -e '// Code generated by "go generate gonum.org/v1/gonum/blas/gonum”; DO NOT EDIT.\n' > level2cmplx64.go +cat level2cmplx128.go \ +| gofmt -r 'blas.Complex128Level2 -> blas.Complex64Level2' \ +\ +| gofmt -r 'complex128 -> complex64' \ +| gofmt -r 'float64 -> float32' \ +\ +| gofmt -r 'c128.AxpyInc -> c64.AxpyInc' \ +| gofmt -r 'c128.AxpyUnitary -> c64.AxpyUnitary' \ +| gofmt -r 'c128.DotuInc -> c64.DotuInc' \ +| gofmt -r 'c128.DotuUnitary -> c64.DotuUnitary' \ +| gofmt -r 'c128.ScalInc -> c64.ScalInc' \ +| gofmt -r 'c128.ScalUnitary -> c64.ScalUnitary' \ +\ +| sed -e "s_^\(func (Implementation) \)Z\(.*\)\$_$WARNINGC64\1C\2_" \ + -e 's_^// Z_// C_' \ + -e 's_"gonum.org/v1/gonum/internal/asm/c128"_"gonum.org/v1/gonum/internal/asm/c64"_' \ + -e 's_"math/cmplx"_cmplx "gonum.org/v1/gonum/internal/cmplx64"_' \ +>> level2cmplx64.go + +# Level3 routines. + +echo Generating level3float32.go +echo -e '// Code generated by "go generate gonum.org/v1/gonum/blas/gonum”; DO NOT EDIT.\n' > level3float32.go +cat level3float64.go \ +| gofmt -r 'blas.Float64Level3 -> blas.Float32Level3' \ +\ +| gofmt -r 'float64 -> float32' \ +\ +| gofmt -r 'f64.AxpyUnitaryTo -> f32.AxpyUnitaryTo' \ +| gofmt -r 'f64.AxpyUnitary -> f32.AxpyUnitary' \ +| gofmt -r 'f64.DotUnitary -> f32.DotUnitary' \ +| gofmt -r 'f64.ScalUnitary -> f32.ScalUnitary' \ +\ +| sed -e "s_^\(func (Implementation) \)D\(.*\)\$_$WARNINGF32\1S\2_" \ + -e 's_^// D_// S_' \ + -e 's_"gonum.org/v1/gonum/internal/asm/f64"_"gonum.org/v1/gonum/internal/asm/f32"_' \ +>> level3float32.go + +echo Generating sgemm.go +echo -e '// Code generated by "go generate gonum.org/v1/gonum/blas/gonum”; DO NOT EDIT.\n' > sgemm.go +cat dgemm.go \ +| gofmt -r 'float64 -> float32' \ +| gofmt -r 'sliceView64 -> sliceView32' \ +\ +| gofmt -r 'dgemmParallel -> sgemmParallel' \ +| gofmt -r 'computeNumBlocks64 -> computeNumBlocks32' \ +| gofmt -r 'dgemmSerial -> sgemmSerial' \ +| gofmt -r 'dgemmSerialNotNot -> sgemmSerialNotNot' \ +| gofmt -r 'dgemmSerialTransNot -> sgemmSerialTransNot' \ +| gofmt -r 'dgemmSerialNotTrans -> sgemmSerialNotTrans' \ +| gofmt -r 'dgemmSerialTransTrans -> sgemmSerialTransTrans' \ +\ +| gofmt -r 'f64.AxpyInc -> f32.AxpyInc' \ +| gofmt -r 'f64.AxpyUnitary -> f32.AxpyUnitary' \ +| gofmt -r 'f64.DotUnitary -> f32.DotUnitary' \ +\ +| sed -e "s_^\(func (Implementation) \)D\(.*\)\$_$WARNINGF32\1S\2_" \ + -e 's_^// D_// S_' \ + -e 's_^// d_// s_' \ + -e 's_"gonum.org/v1/gonum/internal/asm/f64"_"gonum.org/v1/gonum/internal/asm/f32"_' \ +>> sgemm.go + +echo Generating level3cmplx64.go +echo -e '// Code generated by "go generate gonum.org/v1/gonum/blas/gonum”; DO NOT EDIT.\n' > level3cmplx64.go +cat level3cmplx128.go \ +| gofmt -r 'blas.Complex128Level3 -> blas.Complex64Level3' \ +\ +| gofmt -r 'float64 -> float32' \ +| gofmt -r 'complex128 -> complex64' \ +\ +| gofmt -r 'c128.ScalUnitary -> c64.ScalUnitary' \ +| gofmt -r 'c128.DscalUnitary -> c64.SscalUnitary' \ +| gofmt -r 'c128.DotcUnitary -> c64.DotcUnitary' \ +| gofmt -r 'c128.AxpyUnitary -> c64.AxpyUnitary' \ +| gofmt -r 'c128.DotuUnitary -> c64.DotuUnitary' \ +\ +| sed -e "s_^\(func (Implementation) \)Z\(.*\)\$_$WARNINGC64\1C\2_" \ + -e 's_^// Z_// C_' \ + -e 's_"gonum.org/v1/gonum/internal/asm/c128"_"gonum.org/v1/gonum/internal/asm/c64"_' \ + -e 's_"math/cmplx"_cmplx "gonum.org/v1/gonum/internal/cmplx64"_' \ +>> level3cmplx64.go diff --git a/vendor/gonum.org/v1/gonum/floats/README.md b/vendor/gonum.org/v1/gonum/floats/README.md new file mode 100644 index 0000000000..e8ef46d567 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/floats/README.md @@ -0,0 +1,7 @@ +# Gonum floats + +[![go.dev reference](https://pkg.go.dev/badge/gonum.org/v1/gonum/floats)](https://pkg.go.dev/gonum.org/v1/gonum/floats) +[![GoDoc](https://godocs.io/gonum.org/v1/gonum/floats?status.svg)](https://godocs.io/gonum.org/v1/gonum/floats) + +Package floats provides a set of helper routines for dealing with slices of float64. +The functions avoid allocations to allow for use within tight loops without garbage collection overhead. diff --git a/vendor/gonum.org/v1/gonum/floats/doc.go b/vendor/gonum.org/v1/gonum/floats/doc.go new file mode 100644 index 0000000000..bfe05c1918 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/floats/doc.go @@ -0,0 +1,11 @@ +// Copyright ©2017 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package floats provides a set of helper routines for dealing with slices +// of float64. The functions avoid allocations to allow for use within tight +// loops without garbage collection overhead. +// +// The convention used is that when a slice is being modified in place, it has +// the name dst. +package floats // import "gonum.org/v1/gonum/floats" diff --git a/vendor/gonum.org/v1/gonum/floats/floats.go b/vendor/gonum.org/v1/gonum/floats/floats.go new file mode 100644 index 0000000000..68c4e65c7e --- /dev/null +++ b/vendor/gonum.org/v1/gonum/floats/floats.go @@ -0,0 +1,808 @@ +// Copyright ©2013 The Gonum Authors. All rights reserved. +// Use of this code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package floats + +import ( + "errors" + "math" + "slices" + "sort" + + "gonum.org/v1/gonum/floats/scalar" + "gonum.org/v1/gonum/internal/asm/f64" +) + +const ( + zeroLength = "floats: zero length slice" + shortSpan = "floats: slice length less than 2" + badLength = "floats: slice lengths do not match" + badDstLength = "floats: destination slice length does not match input" +) + +// Add adds, element-wise, the elements of s and dst, and stores the result in dst. +// It panics if the argument lengths do not match. +func Add(dst, s []float64) { + if len(dst) != len(s) { + panic(badDstLength) + } + f64.AxpyUnitaryTo(dst, 1, s, dst) +} + +// AddTo adds, element-wise, the elements of s and t and +// stores the result in dst. +// It panics if the argument lengths do not match. +func AddTo(dst, s, t []float64) []float64 { + if len(s) != len(t) { + panic(badLength) + } + if len(dst) != len(s) { + panic(badDstLength) + } + f64.AxpyUnitaryTo(dst, 1, s, t) + return dst +} + +// AddConst adds the scalar c to all of the values in dst. +func AddConst(c float64, dst []float64) { + f64.AddConst(c, dst) +} + +// AddScaled performs dst = dst + alpha * s. +// It panics if the slice argument lengths do not match. +func AddScaled(dst []float64, alpha float64, s []float64) { + if len(dst) != len(s) { + panic(badLength) + } + f64.AxpyUnitaryTo(dst, alpha, s, dst) +} + +// AddScaledTo performs dst = y + alpha * s, where alpha is a scalar, +// and dst, y and s are all slices. +// It panics if the slice argument lengths do not match. +// +// At the return of the function, dst[i] = y[i] + alpha * s[i] +func AddScaledTo(dst, y []float64, alpha float64, s []float64) []float64 { + if len(s) != len(y) { + panic(badLength) + } + if len(dst) != len(y) { + panic(badDstLength) + } + f64.AxpyUnitaryTo(dst, alpha, s, y) + return dst +} + +// argsort is a helper that implements sort.Interface, as used by +// Argsort and ArgsortStable. +type argsort struct { + s []float64 + inds []int +} + +func (a argsort) Len() int { + return len(a.s) +} + +func (a argsort) Less(i, j int) bool { + return a.s[i] < a.s[j] +} + +func (a argsort) Swap(i, j int) { + a.s[i], a.s[j] = a.s[j], a.s[i] + a.inds[i], a.inds[j] = a.inds[j], a.inds[i] +} + +// Argsort sorts the elements of dst while tracking their original order. +// At the conclusion of Argsort, dst will contain the original elements of dst +// but sorted in increasing order, and inds will contain the original position +// of the elements in the slice such that dst[i] = origDst[inds[i]]. +// It panics if the argument lengths do not match. +func Argsort(dst []float64, inds []int) { + if len(dst) != len(inds) { + panic(badDstLength) + } + for i := range dst { + inds[i] = i + } + + a := argsort{s: dst, inds: inds} + sort.Sort(a) +} + +// ArgsortStable sorts the elements of dst while tracking their original order and +// keeping the original order of equal elements. At the conclusion of ArgsortStable, +// dst will contain the original elements of dst but sorted in increasing order, +// and inds will contain the original position of the elements in the slice such +// that dst[i] = origDst[inds[i]]. +// It panics if the argument lengths do not match. +func ArgsortStable(dst []float64, inds []int) { + if len(dst) != len(inds) { + panic(badDstLength) + } + for i := range dst { + inds[i] = i + } + + a := argsort{s: dst, inds: inds} + sort.Stable(a) +} + +// Count applies the function f to every element of s and returns the number +// of times the function returned true. +func Count(f func(float64) bool, s []float64) int { + var n int + for _, val := range s { + if f(val) { + n++ + } + } + return n +} + +// CumProd finds the cumulative product of the first i elements in +// s and puts them in place into the ith element of the +// destination dst. +// It panics if the argument lengths do not match. +// +// At the return of the function, dst[i] = s[i] * s[i-1] * s[i-2] * ... +func CumProd(dst, s []float64) []float64 { + if len(dst) != len(s) { + panic(badDstLength) + } + if len(dst) == 0 { + return dst + } + return f64.CumProd(dst, s) +} + +// CumSum finds the cumulative sum of the first i elements in +// s and puts them in place into the ith element of the +// destination dst. +// It panics if the argument lengths do not match. +// +// At the return of the function, dst[i] = s[i] + s[i-1] + s[i-2] + ... +func CumSum(dst, s []float64) []float64 { + if len(dst) != len(s) { + panic(badDstLength) + } + if len(dst) == 0 { + return dst + } + return f64.CumSum(dst, s) +} + +// Distance computes the L-norm of s - t. See Norm for special cases. +// It panics if the slice argument lengths do not match. +func Distance(s, t []float64, L float64) float64 { + if len(s) != len(t) { + panic(badLength) + } + if len(s) == 0 { + return 0 + } + if L == 2 { + return f64.L2DistanceUnitary(s, t) + } + var norm float64 + if L == 1 { + for i, v := range s { + norm += math.Abs(t[i] - v) + } + return norm + } + if math.IsInf(L, 1) { + for i, v := range s { + absDiff := math.Abs(t[i] - v) + if absDiff > norm { + norm = absDiff + } + } + return norm + } + for i, v := range s { + norm += math.Pow(math.Abs(t[i]-v), L) + } + return math.Pow(norm, 1/L) +} + +// Div performs element-wise division dst / s +// and stores the value in dst. +// It panics if the argument lengths do not match. +func Div(dst, s []float64) { + if len(dst) != len(s) { + panic(badLength) + } + f64.Div(dst, s) +} + +// DivTo performs element-wise division s / t +// and stores the value in dst. +// It panics if the argument lengths do not match. +func DivTo(dst, s, t []float64) []float64 { + if len(s) != len(t) { + panic(badLength) + } + if len(dst) != len(s) { + panic(badDstLength) + } + return f64.DivTo(dst, s, t) +} + +// Dot computes the dot product of s1 and s2, i.e. +// sum_{i = 1}^N s1[i]*s2[i]. +// It panics if the argument lengths do not match. +func Dot(s1, s2 []float64) float64 { + if len(s1) != len(s2) { + panic(badLength) + } + return f64.DotUnitary(s1, s2) +} + +// Equal returns true when the slices have equal lengths and +// all elements are numerically identical. +func Equal(s1, s2 []float64) bool { + if len(s1) != len(s2) { + return false + } + for i, val := range s1 { + if s2[i] != val { + return false + } + } + return true +} + +// EqualApprox returns true when the slices have equal lengths and +// all element pairs have an absolute tolerance less than tol or a +// relative tolerance less than tol. +func EqualApprox(s1, s2 []float64, tol float64) bool { + if len(s1) != len(s2) { + return false + } + for i, a := range s1 { + if !scalar.EqualWithinAbsOrRel(a, s2[i], tol, tol) { + return false + } + } + return true +} + +// EqualFunc returns true when the slices have the same lengths +// and the function returns true for all element pairs. +func EqualFunc(s1, s2 []float64, f func(float64, float64) bool) bool { + if len(s1) != len(s2) { + return false + } + for i, val := range s1 { + if !f(val, s2[i]) { + return false + } + } + return true +} + +// EqualLengths returns true when all of the slices have equal length, +// and false otherwise. It also returns true when there are no input slices. +func EqualLengths(slices ...[]float64) bool { + // This length check is needed: http://play.golang.org/p/sdty6YiLhM + if len(slices) == 0 { + return true + } + l := len(slices[0]) + for i := 1; i < len(slices); i++ { + if len(slices[i]) != l { + return false + } + } + return true +} + +// Find applies f to every element of s and returns the indices of the first +// k elements for which the f returns true, or all such elements +// if k < 0. +// Find will reslice inds to have 0 length, and will append +// found indices to inds. +// If k > 0 and there are fewer than k elements in s satisfying f, +// all of the found elements will be returned along with an error. +// At the return of the function, the input inds will be in an undetermined state. +func Find(inds []int, f func(float64) bool, s []float64, k int) ([]int, error) { + // inds is also returned to allow for calling with nil. + + // Reslice inds to have zero length. + inds = inds[:0] + + // If zero elements requested, can just return. + if k == 0 { + return inds, nil + } + + // If k < 0, return all of the found indices. + if k < 0 { + for i, val := range s { + if f(val) { + inds = append(inds, i) + } + } + return inds, nil + } + + // Otherwise, find the first k elements. + nFound := 0 + for i, val := range s { + if f(val) { + inds = append(inds, i) + nFound++ + if nFound == k { + return inds, nil + } + } + } + // Finished iterating over the loop, which means k elements were not found. + return inds, errors.New("floats: insufficient elements found") +} + +// HasNaN returns true when the slice s has any values that are NaN and false +// otherwise. +func HasNaN(s []float64) bool { + for _, v := range s { + if math.IsNaN(v) { + return true + } + } + return false +} + +// LogSpan returns a set of n equally spaced points in log space between, +// l and u where N is equal to len(dst). The first element of the +// resulting dst will be l and the final element of dst will be u. +// It panics if the length of dst is less than 2. +// Note that this call will return NaNs if either l or u are negative, and +// will return all zeros if l or u is zero. +// Also returns the mutated slice dst, so that it can be used in range, like: +// +// for i, x := range LogSpan(dst, l, u) { ... } +func LogSpan(dst []float64, l, u float64) []float64 { + Span(dst, math.Log(l), math.Log(u)) + for i := range dst { + dst[i] = math.Exp(dst[i]) + } + return dst +} + +// LogSumExp returns the log of the sum of the exponentials of the values in s. +// Panics if s is an empty slice. +func LogSumExp(s []float64) float64 { + // Want to do this in a numerically stable way which avoids + // overflow and underflow + // First, find the maximum value in the slice. + maxval := Max(s) + if math.IsInf(maxval, 0) { + // If it's infinity either way, the logsumexp will be infinity as well + // returning now avoids NaNs + return maxval + } + var lse float64 + // Compute the sumexp part + for _, val := range s { + lse += math.Exp(val - maxval) + } + // Take the log and add back on the constant taken out + return math.Log(lse) + maxval +} + +// Max returns the maximum value in the input slice. If the slice is empty, Max will panic. +func Max(s []float64) float64 { + return s[MaxIdx(s)] +} + +// MaxIdx returns the index of the maximum value in the input slice. If several +// entries have the maximum value, the first such index is returned. +// It panics if s is zero length. +func MaxIdx(s []float64) int { + if len(s) == 0 { + panic(zeroLength) + } + max := math.NaN() + var ind int + for i, v := range s { + if math.IsNaN(v) { + continue + } + if v > max || math.IsNaN(max) { + max = v + ind = i + } + } + return ind +} + +// Min returns the minimum value in the input slice. +// It panics if s is zero length. +func Min(s []float64) float64 { + return s[MinIdx(s)] +} + +// MinIdx returns the index of the minimum value in the input slice. If several +// entries have the minimum value, the first such index is returned. +// It panics if s is zero length. +func MinIdx(s []float64) int { + if len(s) == 0 { + panic(zeroLength) + } + min := math.NaN() + var ind int + for i, v := range s { + if math.IsNaN(v) { + continue + } + if v < min || math.IsNaN(min) { + min = v + ind = i + } + } + return ind +} + +// Mul performs element-wise multiplication between dst +// and s and stores the value in dst. +// It panics if the argument lengths do not match. +func Mul(dst, s []float64) { + if len(dst) != len(s) { + panic(badLength) + } + for i, val := range s { + dst[i] *= val + } +} + +// MulTo performs element-wise multiplication between s +// and t and stores the value in dst. +// It panics if the argument lengths do not match. +func MulTo(dst, s, t []float64) []float64 { + if len(s) != len(t) { + panic(badLength) + } + if len(dst) != len(s) { + panic(badDstLength) + } + for i, val := range t { + dst[i] = val * s[i] + } + return dst +} + +// NearestIdx returns the index of the element in s +// whose value is nearest to v. If several such +// elements exist, the lowest index is returned. +// It panics if s is zero length. +func NearestIdx(s []float64, v float64) int { + if len(s) == 0 { + panic(zeroLength) + } + switch { + case math.IsNaN(v): + return 0 + case math.IsInf(v, 1): + return MaxIdx(s) + case math.IsInf(v, -1): + return MinIdx(s) + } + var ind int + dist := math.NaN() + for i, val := range s { + newDist := math.Abs(v - val) + // A NaN distance will not be closer. + if math.IsNaN(newDist) { + continue + } + if newDist < dist || math.IsNaN(dist) { + dist = newDist + ind = i + } + } + return ind +} + +// NearestIdxForSpan return the index of a hypothetical vector created +// by Span with length n and bounds l and u whose value is closest +// to v. That is, NearestIdxForSpan(n, l, u, v) is equivalent to +// Nearest(Span(make([]float64, n),l,u),v) without an allocation. +// It panics if n is less than two. +func NearestIdxForSpan(n int, l, u float64, v float64) int { + if n < 2 { + panic(shortSpan) + } + if math.IsNaN(v) { + return 0 + } + + // Special cases for Inf and NaN. + switch { + case math.IsNaN(l) && !math.IsNaN(u): + return n - 1 + case math.IsNaN(u): + return 0 + case math.IsInf(l, 0) && math.IsInf(u, 0): + if l == u { + return 0 + } + if n%2 == 1 { + if !math.IsInf(v, 0) { + return n / 2 + } + if math.Copysign(1, v) == math.Copysign(1, l) { + return 0 + } + return n/2 + 1 + } + if math.Copysign(1, v) == math.Copysign(1, l) { + return 0 + } + return n / 2 + case math.IsInf(l, 0): + if v == l { + return 0 + } + return n - 1 + case math.IsInf(u, 0): + if v == u { + return n - 1 + } + return 0 + case math.IsInf(v, -1): + if l <= u { + return 0 + } + return n - 1 + case math.IsInf(v, 1): + if u <= l { + return 0 + } + return n - 1 + } + + // Special cases for v outside (l, u) and (u, l). + switch { + case l < u: + if v <= l { + return 0 + } + if v >= u { + return n - 1 + } + case l > u: + if v >= l { + return 0 + } + if v <= u { + return n - 1 + } + default: + return 0 + } + + // Can't guarantee anything about exactly halfway between + // because of floating point weirdness. + return int((float64(n)-1)/(u-l)*(v-l) + 0.5) +} + +// Norm returns the L norm of the slice S, defined as +// (sum_{i=1}^N s[i]^L)^{1/L} +// Special cases: +// L = math.Inf(1) gives the maximum absolute value. +// Does not correctly compute the zero norm (use Count). +func Norm(s []float64, L float64) float64 { + // Should this complain if L is not positive? + // Should this be done in log space for better numerical stability? + // would be more cost + // maybe only if L is high? + if len(s) == 0 { + return 0 + } + if L == 2 { + return f64.L2NormUnitary(s) + } + var norm float64 + if L == 1 { + for _, val := range s { + norm += math.Abs(val) + } + return norm + } + if math.IsInf(L, 1) { + for _, val := range s { + norm = math.Max(norm, math.Abs(val)) + } + return norm + } + for _, val := range s { + norm += math.Pow(math.Abs(val), L) + } + return math.Pow(norm, 1/L) +} + +// Prod returns the product of the elements of the slice. +// Returns 1 if len(s) = 0. +func Prod(s []float64) float64 { + prod := 1.0 + for _, val := range s { + prod *= val + } + return prod +} + +// Reverse reverses the order of elements in the slice. +// +// Deprecated: This function simply calls [slices.Reverse]. +func Reverse(s []float64) { + slices.Reverse(s) +} + +// Same returns true when the input slices have the same length and all +// elements have the same value with NaN treated as the same. +func Same(s, t []float64) bool { + if len(s) != len(t) { + return false + } + for i, v := range s { + w := t[i] + if v != w && !(math.IsNaN(v) && math.IsNaN(w)) { + return false + } + } + return true +} + +// Scale multiplies every element in dst by the scalar c. +func Scale(c float64, dst []float64) { + if len(dst) > 0 { + f64.ScalUnitary(c, dst) + } +} + +// ScaleTo multiplies the elements in s by c and stores the result in dst. +// It panics if the slice argument lengths do not match. +func ScaleTo(dst []float64, c float64, s []float64) []float64 { + if len(dst) != len(s) { + panic(badDstLength) + } + if len(dst) > 0 { + f64.ScalUnitaryTo(dst, c, s) + } + return dst +} + +// Span returns a set of N equally spaced points between l and u, where N +// is equal to the length of the destination. The first element of the destination +// is l, the final element of the destination is u. +// It panics if the length of dst is less than 2. +// +// Span also returns the mutated slice dst, so that it can be used in range expressions, +// like: +// +// for i, x := range Span(dst, l, u) { ... } +func Span(dst []float64, l, u float64) []float64 { + n := len(dst) + if n < 2 { + panic(shortSpan) + } + + // Special cases for Inf and NaN. + switch { + case math.IsNaN(l): + for i := range dst[:len(dst)-1] { + dst[i] = math.NaN() + } + dst[len(dst)-1] = u + return dst + case math.IsNaN(u): + for i := range dst[1:] { + dst[i+1] = math.NaN() + } + dst[0] = l + return dst + case math.IsInf(l, 0) && math.IsInf(u, 0): + for i := range dst[:len(dst)/2] { + dst[i] = l + dst[len(dst)-i-1] = u + } + if len(dst)%2 == 1 { + if l != u { + dst[len(dst)/2] = 0 + } else { + dst[len(dst)/2] = l + } + } + return dst + case math.IsInf(l, 0): + for i := range dst[:len(dst)-1] { + dst[i] = l + } + dst[len(dst)-1] = u + return dst + case math.IsInf(u, 0): + for i := range dst[1:] { + dst[i+1] = u + } + dst[0] = l + return dst + } + + step := (u - l) / float64(n-1) + for i := range dst { + dst[i] = l + step*float64(i) + } + return dst +} + +// Sub subtracts, element-wise, the elements of s from dst. +// It panics if the argument lengths do not match. +func Sub(dst, s []float64) { + if len(dst) != len(s) { + panic(badLength) + } + f64.AxpyUnitaryTo(dst, -1, s, dst) +} + +// SubTo subtracts, element-wise, the elements of t from s and +// stores the result in dst. +// It panics if the argument lengths do not match. +func SubTo(dst, s, t []float64) []float64 { + if len(s) != len(t) { + panic(badLength) + } + if len(dst) != len(s) { + panic(badDstLength) + } + f64.AxpyUnitaryTo(dst, -1, t, s) + return dst +} + +// Sum returns the sum of the elements of the slice. +func Sum(s []float64) float64 { + return f64.Sum(s) +} + +// Within returns the first index i where s[i] <= v < s[i+1]. Within panics if: +// - len(s) < 2 +// - s is not sorted +func Within(s []float64, v float64) int { + if len(s) < 2 { + panic(shortSpan) + } + if !sort.Float64sAreSorted(s) { + panic("floats: input slice not sorted") + } + if v < s[0] || v >= s[len(s)-1] || math.IsNaN(v) { + return -1 + } + for i, f := range s[1:] { + if v < f { + return i + } + } + return -1 +} + +// SumCompensated returns the sum of the elements of the slice calculated with greater +// accuracy than Sum at the expense of additional computation. +func SumCompensated(s []float64) float64 { + // SumCompensated uses an improved version of Kahan's compensated + // summation algorithm proposed by Neumaier. + // See https://en.wikipedia.org/wiki/Kahan_summation_algorithm for details. + var sum, c float64 + for _, x := range s { + // This type conversion is here to prevent a sufficiently smart compiler + // from optimising away these operations. + t := float64(sum + x) + if math.Abs(sum) >= math.Abs(x) { + c += (sum - t) + x + } else { + c += (x - t) + sum + } + sum = t + } + return sum + c +} diff --git a/vendor/gonum.org/v1/gonum/floats/scalar/doc.go b/vendor/gonum.org/v1/gonum/floats/scalar/doc.go new file mode 100644 index 0000000000..9e69c193e2 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/floats/scalar/doc.go @@ -0,0 +1,6 @@ +// Copyright ©2017 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package scalar provides a set of helper routines for dealing with float64 values. +package scalar // import "gonum.org/v1/gonum/floats/scalar" diff --git a/vendor/gonum.org/v1/gonum/floats/scalar/scalar.go b/vendor/gonum.org/v1/gonum/floats/scalar/scalar.go new file mode 100644 index 0000000000..46bf06b353 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/floats/scalar/scalar.go @@ -0,0 +1,171 @@ +// Copyright ©2013 The Gonum Authors. All rights reserved. +// Use of this code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package scalar + +import ( + "math" + "strconv" +) + +// EqualWithinAbs returns true when a and b have an absolute difference +// not greater than tol. +func EqualWithinAbs(a, b, tol float64) bool { + return a == b || math.Abs(a-b) <= tol +} + +// minNormalFloat64 is the smallest normal number. For 64 bit IEEE-754 +// floats this is 2^{-1022}. +const minNormalFloat64 = 0x1p-1022 + +// EqualWithinRel returns true when the difference between a and b +// is not greater than tol times the greater absolute value of a and b, +// +// abs(a-b) <= tol * max(abs(a), abs(b)). +func EqualWithinRel(a, b, tol float64) bool { + if a == b { + return true + } + delta := math.Abs(a - b) + if delta <= minNormalFloat64 { + return delta <= tol*minNormalFloat64 + } + // We depend on the division in this relationship to identify + // infinities (we rely on the NaN to fail the test) otherwise + // we compare Infs of the same sign and evaluate Infs as equal + // independent of sign. + return delta/math.Max(math.Abs(a), math.Abs(b)) <= tol +} + +// EqualWithinAbsOrRel returns true when a and b are equal to within +// the absolute or relative tolerances. See EqualWithinAbs and +// EqualWithinRel for details. +func EqualWithinAbsOrRel(a, b, absTol, relTol float64) bool { + return EqualWithinAbs(a, b, absTol) || EqualWithinRel(a, b, relTol) +} + +// EqualWithinULP returns true when a and b are equal to within +// the specified number of floating point units in the last place. +func EqualWithinULP(a, b float64, ulp uint) bool { + if a == b { + return true + } + if math.IsNaN(a) || math.IsNaN(b) { + return false + } + if math.Signbit(a) != math.Signbit(b) { + return math.Float64bits(math.Abs(a))+math.Float64bits(math.Abs(b)) <= uint64(ulp) + } + return ulpDiff(math.Float64bits(a), math.Float64bits(b)) <= uint64(ulp) +} + +func ulpDiff(a, b uint64) uint64 { + if a > b { + return a - b + } + return b - a +} + +const ( + nanBits = 0x7ff8000000000000 + nanMask = 0xfff8000000000000 +) + +// NaNWith returns an IEEE 754 "quiet not-a-number" value with the +// payload specified in the low 51 bits of payload. +// The NaN returned by math.NaN has a bit pattern equal to NaNWith(1). +func NaNWith(payload uint64) float64 { + return math.Float64frombits(nanBits | (payload &^ nanMask)) +} + +// NaNPayload returns the lowest 51 bits payload of an IEEE 754 "quiet +// not-a-number". For values of f other than quiet-NaN, NaNPayload +// returns zero and false. +func NaNPayload(f float64) (payload uint64, ok bool) { + b := math.Float64bits(f) + if b&nanBits != nanBits { + return 0, false + } + return b &^ nanMask, true +} + +// ParseWithNA converts the string s to a float64 in value. +// If s equals missing, weight is returned as 0, otherwise 1. +func ParseWithNA(s, missing string) (value, weight float64, err error) { + if s == missing { + return 0, 0, nil + } + value, err = strconv.ParseFloat(s, 64) + if err == nil { + weight = 1 + } + return value, weight, err +} + +// Round returns the half away from zero rounded value of x with prec precision. +// +// Special cases are: +// +// Round(±0) = +0 +// Round(±Inf) = ±Inf +// Round(NaN) = NaN +func Round(x float64, prec int) float64 { + if x == 0 { + // Make sure zero is returned + // without the negative bit set. + return 0 + } + // Fast path for positive precision on integers. + if prec >= 0 && x == math.Trunc(x) { + return x + } + pow := math.Pow10(prec) + intermed := x * pow + if math.IsInf(intermed, 0) { + return x + } + x = math.Round(intermed) + + if x == 0 { + return 0 + } + + return x / pow +} + +// RoundEven returns the half even rounded value of x with prec precision. +// +// Special cases are: +// +// RoundEven(±0) = +0 +// RoundEven(±Inf) = ±Inf +// RoundEven(NaN) = NaN +func RoundEven(x float64, prec int) float64 { + if x == 0 { + // Make sure zero is returned + // without the negative bit set. + return 0 + } + // Fast path for positive precision on integers. + if prec >= 0 && x == math.Trunc(x) { + return x + } + pow := math.Pow10(prec) + intermed := x * pow + if math.IsInf(intermed, 0) { + return x + } + x = math.RoundToEven(intermed) + + if x == 0 { + return 0 + } + + return x / pow +} + +// Same returns true when the inputs have the same value, allowing NaN equality. +func Same(a, b float64) bool { + return a == b || (math.IsNaN(a) && math.IsNaN(b)) +} diff --git a/vendor/gonum.org/v1/gonum/internal/asm/c128/axpyinc_amd64.s b/vendor/gonum.org/v1/gonum/internal/asm/c128/axpyinc_amd64.s new file mode 100644 index 0000000000..d9b71a0d6b --- /dev/null +++ b/vendor/gonum.org/v1/gonum/internal/asm/c128/axpyinc_amd64.s @@ -0,0 +1,134 @@ +// Copyright ©2016 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build !noasm,!gccgo,!safe + +#include "textflag.h" + +// MOVDDUP X2, X3 +#define MOVDDUP_X2_X3 BYTE $0xF2; BYTE $0x0F; BYTE $0x12; BYTE $0xDA +// MOVDDUP X4, X5 +#define MOVDDUP_X4_X5 BYTE $0xF2; BYTE $0x0F; BYTE $0x12; BYTE $0xEC +// MOVDDUP X6, X7 +#define MOVDDUP_X6_X7 BYTE $0xF2; BYTE $0x0F; BYTE $0x12; BYTE $0xFE +// MOVDDUP X8, X9 +#define MOVDDUP_X8_X9 BYTE $0xF2; BYTE $0x45; BYTE $0x0F; BYTE $0x12; BYTE $0xC8 + +// ADDSUBPD X2, X3 +#define ADDSUBPD_X2_X3 BYTE $0x66; BYTE $0x0F; BYTE $0xD0; BYTE $0xDA +// ADDSUBPD X4, X5 +#define ADDSUBPD_X4_X5 BYTE $0x66; BYTE $0x0F; BYTE $0xD0; BYTE $0xEC +// ADDSUBPD X6, X7 +#define ADDSUBPD_X6_X7 BYTE $0x66; BYTE $0x0F; BYTE $0xD0; BYTE $0xFE +// ADDSUBPD X8, X9 +#define ADDSUBPD_X8_X9 BYTE $0x66; BYTE $0x45; BYTE $0x0F; BYTE $0xD0; BYTE $0xC8 + +// func AxpyInc(alpha complex128, x, y []complex128, n, incX, incY, ix, iy uintptr) +TEXT ·AxpyInc(SB), NOSPLIT, $0 + MOVQ x_base+16(FP), SI // SI = &x + MOVQ y_base+40(FP), DI // DI = &y + MOVQ n+64(FP), CX // CX = n + CMPQ CX, $0 // if n==0 { return } + JE axpyi_end + MOVQ ix+88(FP), R8 // R8 = ix // Load the first index + SHLQ $4, R8 // R8 *= sizeof(complex128) + MOVQ iy+96(FP), R9 // R9 = iy + SHLQ $4, R9 // R9 *= sizeof(complex128) + LEAQ (SI)(R8*1), SI // SI = &(x[ix]) + LEAQ (DI)(R9*1), DI // DI = &(y[iy]) + MOVQ DI, DX // DX = DI // Separate Read/Write pointers + MOVQ incX+72(FP), R8 // R8 = incX + SHLQ $4, R8 // R8 *= sizeof(complex128) + MOVQ incY+80(FP), R9 // R9 = iy + SHLQ $4, R9 // R9 *= sizeof(complex128) + MOVUPS alpha+0(FP), X0 // X0 = { imag(a), real(a) } + MOVAPS X0, X1 + SHUFPD $0x1, X1, X1 // X1 = { real(a), imag(a) } + MOVAPS X0, X10 // Copy X0 and X1 for pipelining + MOVAPS X1, X11 + MOVQ CX, BX + ANDQ $3, CX // CX = n % 4 + SHRQ $2, BX // BX = floor( n / 4 ) + JZ axpyi_tail // if BX == 0 { goto axpyi_tail } + +axpyi_loop: // do { + MOVUPS (SI), X2 // X_i = { imag(x[i]), real(x[i]) } + MOVUPS (SI)(R8*1), X4 + LEAQ (SI)(R8*2), SI // SI = &(SI[incX*2]) + MOVUPS (SI), X6 + MOVUPS (SI)(R8*1), X8 + + // X_(i+1) = { real(x[i], real(x[i]) } + MOVDDUP_X2_X3 + MOVDDUP_X4_X5 + MOVDDUP_X6_X7 + MOVDDUP_X8_X9 + + // X_i = { imag(x[i]), imag(x[i]) } + SHUFPD $0x3, X2, X2 + SHUFPD $0x3, X4, X4 + SHUFPD $0x3, X6, X6 + SHUFPD $0x3, X8, X8 + + // X_i = { real(a) * imag(x[i]), imag(a) * imag(x[i]) } + // X_(i+1) = { imag(a) * real(x[i]), real(a) * real(x[i]) } + MULPD X1, X2 + MULPD X0, X3 + MULPD X11, X4 + MULPD X10, X5 + MULPD X1, X6 + MULPD X0, X7 + MULPD X11, X8 + MULPD X10, X9 + + // X_(i+1) = { + // imag(result[i]): imag(a)*real(x[i]) + real(a)*imag(x[i]), + // real(result[i]): real(a)*real(x[i]) - imag(a)*imag(x[i]) + // } + ADDSUBPD_X2_X3 + ADDSUBPD_X4_X5 + ADDSUBPD_X6_X7 + ADDSUBPD_X8_X9 + + // X_(i+1) = { imag(result[i]) + imag(y[i]), real(result[i]) + real(y[i]) } + ADDPD (DX), X3 + ADDPD (DX)(R9*1), X5 + LEAQ (DX)(R9*2), DX // DX = &(DX[incY*2]) + ADDPD (DX), X7 + ADDPD (DX)(R9*1), X9 + MOVUPS X3, (DI) // dst[i] = X_(i+1) + MOVUPS X5, (DI)(R9*1) + LEAQ (DI)(R9*2), DI + MOVUPS X7, (DI) + MOVUPS X9, (DI)(R9*1) + LEAQ (SI)(R8*2), SI // SI = &(SI[incX*2]) + LEAQ (DX)(R9*2), DX // DX = &(DX[incY*2]) + LEAQ (DI)(R9*2), DI // DI = &(DI[incY*2]) + DECQ BX + JNZ axpyi_loop // } while --BX > 0 + CMPQ CX, $0 // if CX == 0 { return } + JE axpyi_end + +axpyi_tail: // do { + MOVUPS (SI), X2 // X_i = { imag(x[i]), real(x[i]) } + MOVDDUP_X2_X3 // X_(i+1) = { real(x[i], real(x[i]) } + SHUFPD $0x3, X2, X2 // X_i = { imag(x[i]), imag(x[i]) } + MULPD X1, X2 // X_i = { real(a) * imag(x[i]), imag(a) * imag(x[i]) } + MULPD X0, X3 // X_(i+1) = { imag(a) * real(x[i]), real(a) * real(x[i]) } + + // X_(i+1) = { + // imag(result[i]): imag(a)*real(x[i]) + real(a)*imag(x[i]), + // real(result[i]): real(a)*real(x[i]) - imag(a)*imag(x[i]) + // } + ADDSUBPD_X2_X3 + + // X_(i+1) = { imag(result[i]) + imag(y[i]), real(result[i]) + real(y[i]) } + ADDPD (DI), X3 + MOVUPS X3, (DI) // y[i] = X_i + ADDQ R8, SI // SI = &(SI[incX]) + ADDQ R9, DI // DI = &(DI[incY]) + LOOP axpyi_tail // } while --CX > 0 + +axpyi_end: + RET diff --git a/vendor/gonum.org/v1/gonum/internal/asm/c128/axpyincto_amd64.s b/vendor/gonum.org/v1/gonum/internal/asm/c128/axpyincto_amd64.s new file mode 100644 index 0000000000..d35e95d982 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/internal/asm/c128/axpyincto_amd64.s @@ -0,0 +1,141 @@ +// Copyright ©2016 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build !noasm,!gccgo,!safe + +#include "textflag.h" + +// MOVDDUP X2, X3 +#define MOVDDUP_X2_X3 BYTE $0xF2; BYTE $0x0F; BYTE $0x12; BYTE $0xDA +// MOVDDUP X4, X5 +#define MOVDDUP_X4_X5 BYTE $0xF2; BYTE $0x0F; BYTE $0x12; BYTE $0xEC +// MOVDDUP X6, X7 +#define MOVDDUP_X6_X7 BYTE $0xF2; BYTE $0x0F; BYTE $0x12; BYTE $0xFE +// MOVDDUP X8, X9 +#define MOVDDUP_X8_X9 BYTE $0xF2; BYTE $0x45; BYTE $0x0F; BYTE $0x12; BYTE $0xC8 + +// ADDSUBPD X2, X3 +#define ADDSUBPD_X2_X3 BYTE $0x66; BYTE $0x0F; BYTE $0xD0; BYTE $0xDA +// ADDSUBPD X4, X5 +#define ADDSUBPD_X4_X5 BYTE $0x66; BYTE $0x0F; BYTE $0xD0; BYTE $0xEC +// ADDSUBPD X6, X7 +#define ADDSUBPD_X6_X7 BYTE $0x66; BYTE $0x0F; BYTE $0xD0; BYTE $0xFE +// ADDSUBPD X8, X9 +#define ADDSUBPD_X8_X9 BYTE $0x66; BYTE $0x45; BYTE $0x0F; BYTE $0xD0; BYTE $0xC8 + +// func AxpyIncTo(dst []complex128, incDst, idst uintptr, alpha complex128, x, y []complex128, n, incX, incY, ix, iy uintptr) +TEXT ·AxpyIncTo(SB), NOSPLIT, $0 + MOVQ dst_base+0(FP), DI // DI = &dst + MOVQ x_base+56(FP), SI // SI = &x + MOVQ y_base+80(FP), DX // DX = &y + MOVQ n+104(FP), CX // CX = n + CMPQ CX, $0 // if n==0 { return } + JE axpyi_end + MOVQ ix+128(FP), R8 // R8 = ix // Load the first index + SHLQ $4, R8 // R8 *= sizeof(complex128) + MOVQ iy+136(FP), R9 // R9 = iy + SHLQ $4, R9 // R9 *= sizeof(complex128) + MOVQ idst+32(FP), R10 // R10 = idst + SHLQ $4, R10 // R10 *= sizeof(complex128) + LEAQ (SI)(R8*1), SI // SI = &(x[ix]) + LEAQ (DX)(R9*1), DX // DX = &(y[iy]) + LEAQ (DI)(R10*1), DI // DI = &(dst[idst]) + MOVQ incX+112(FP), R8 // R8 = incX + SHLQ $4, R8 // R8 *= sizeof(complex128) + MOVQ incY+120(FP), R9 // R9 = incY + SHLQ $4, R9 // R9 *= sizeof(complex128) + MOVQ incDst+24(FP), R10 // R10 = incDst + SHLQ $4, R10 // R10 *= sizeof(complex128) + MOVUPS alpha+40(FP), X0 // X0 = { imag(a), real(a) } + MOVAPS X0, X1 + SHUFPD $0x1, X1, X1 // X1 = { real(a), imag(a) } + MOVAPS X0, X10 // Copy X0 and X1 for pipelining + MOVAPS X1, X11 + MOVQ CX, BX + ANDQ $3, CX // CX = n % 4 + SHRQ $2, BX // BX = floor( n / 4 ) + JZ axpyi_tail // if BX == 0 { goto axpyi_tail } + +axpyi_loop: // do { + MOVUPS (SI), X2 // X_i = { imag(x[i]), real(x[i]) } + MOVUPS (SI)(R8*1), X4 + LEAQ (SI)(R8*2), SI // SI = &(SI[incX*2]) + + MOVUPS (SI), X6 + MOVUPS (SI)(R8*1), X8 + + // X_(i+1) = { real(x[i], real(x[i]) } + MOVDDUP_X2_X3 + MOVDDUP_X4_X5 + MOVDDUP_X6_X7 + MOVDDUP_X8_X9 + + // X_i = { imag(x[i]), imag(x[i]) } + SHUFPD $0x3, X2, X2 + SHUFPD $0x3, X4, X4 + SHUFPD $0x3, X6, X6 + SHUFPD $0x3, X8, X8 + + // X_i = { real(a) * imag(x[i]), imag(a) * imag(x[i]) } + // X_(i+1) = { imag(a) * real(x[i]), real(a) * real(x[i]) } + MULPD X1, X2 + MULPD X0, X3 + MULPD X11, X4 + MULPD X10, X5 + MULPD X1, X6 + MULPD X0, X7 + MULPD X11, X8 + MULPD X10, X9 + + // X_(i+1) = { + // imag(result[i]): imag(a)*real(x[i]) + real(a)*imag(x[i]), + // real(result[i]): real(a)*real(x[i]) - imag(a)*imag(x[i]) + // } + ADDSUBPD_X2_X3 + ADDSUBPD_X4_X5 + ADDSUBPD_X6_X7 + ADDSUBPD_X8_X9 + + // X_(i+1) = { imag(result[i]) + imag(y[i]), real(result[i]) + real(y[i]) } + ADDPD (DX), X3 + ADDPD (DX)(R9*1), X5 + LEAQ (DX)(R9*2), DX // DX = &(DX[incY*2]) + ADDPD (DX), X7 + ADDPD (DX)(R9*1), X9 + MOVUPS X3, (DI) // dst[i] = X_(i+1) + MOVUPS X5, (DI)(R10*1) + LEAQ (DI)(R10*2), DI + MOVUPS X7, (DI) + MOVUPS X9, (DI)(R10*1) + LEAQ (SI)(R8*2), SI // SI = &(SI[incX*2]) + LEAQ (DX)(R9*2), DX // DX = &(DX[incY*2]) + LEAQ (DI)(R10*2), DI // DI = &(DI[incDst*2]) + DECQ BX + JNZ axpyi_loop // } while --BX > 0 + CMPQ CX, $0 // if CX == 0 { return } + JE axpyi_end + +axpyi_tail: // do { + MOVUPS (SI), X2 // X_i = { imag(x[i]), real(x[i]) } + MOVDDUP_X2_X3 // X_(i+1) = { real(x[i], real(x[i]) } + SHUFPD $0x3, X2, X2 // X_i = { imag(x[i]), imag(x[i]) } + MULPD X1, X2 // X_i = { real(a) * imag(x[i]), imag(a) * imag(x[i]) } + MULPD X0, X3 // X_(i+1) = { imag(a) * real(x[i]), real(a) * real(x[i]) } + + // X_(i+1) = { + // imag(result[i]): imag(a)*real(x[i]) + real(a)*imag(x[i]), + // real(result[i]): real(a)*real(x[i]) - imag(a)*imag(x[i]) + // } + ADDSUBPD_X2_X3 + + // X_(i+1) = { imag(result[i]) + imag(y[i]), real(result[i]) + real(y[i]) } + ADDPD (DX), X3 + MOVUPS X3, (DI) // y[i] X_(i+1) + ADDQ R8, SI // SI += incX + ADDQ R9, DX // DX += incY + ADDQ R10, DI // DI += incDst + LOOP axpyi_tail // } while --CX > 0 + +axpyi_end: + RET diff --git a/vendor/gonum.org/v1/gonum/internal/asm/c128/axpyunitary_amd64.s b/vendor/gonum.org/v1/gonum/internal/asm/c128/axpyunitary_amd64.s new file mode 100644 index 0000000000..a6783255fd --- /dev/null +++ b/vendor/gonum.org/v1/gonum/internal/asm/c128/axpyunitary_amd64.s @@ -0,0 +1,122 @@ +// Copyright ©2016 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build !noasm,!gccgo,!safe + +#include "textflag.h" + +// MOVDDUP X2, X3 +#define MOVDDUP_X2_X3 BYTE $0xF2; BYTE $0x0F; BYTE $0x12; BYTE $0xDA +// MOVDDUP X4, X5 +#define MOVDDUP_X4_X5 BYTE $0xF2; BYTE $0x0F; BYTE $0x12; BYTE $0xEC +// MOVDDUP X6, X7 +#define MOVDDUP_X6_X7 BYTE $0xF2; BYTE $0x0F; BYTE $0x12; BYTE $0xFE +// MOVDDUP X8, X9 +#define MOVDDUP_X8_X9 BYTE $0xF2; BYTE $0x45; BYTE $0x0F; BYTE $0x12; BYTE $0xC8 + +// ADDSUBPD X2, X3 +#define ADDSUBPD_X2_X3 BYTE $0x66; BYTE $0x0F; BYTE $0xD0; BYTE $0xDA +// ADDSUBPD X4, X5 +#define ADDSUBPD_X4_X5 BYTE $0x66; BYTE $0x0F; BYTE $0xD0; BYTE $0xEC +// ADDSUBPD X6, X7 +#define ADDSUBPD_X6_X7 BYTE $0x66; BYTE $0x0F; BYTE $0xD0; BYTE $0xFE +// ADDSUBPD X8, X9 +#define ADDSUBPD_X8_X9 BYTE $0x66; BYTE $0x45; BYTE $0x0F; BYTE $0xD0; BYTE $0xC8 + +// func AxpyUnitary(alpha complex128, x, y []complex128) +TEXT ·AxpyUnitary(SB), NOSPLIT, $0 + MOVQ x_base+16(FP), SI // SI = &x + MOVQ y_base+40(FP), DI // DI = &y + MOVQ x_len+24(FP), CX // CX = min( len(x), len(y) ) + CMPQ y_len+48(FP), CX + CMOVQLE y_len+48(FP), CX + CMPQ CX, $0 // if CX == 0 { return } + JE caxy_end + PXOR X0, X0 // Clear work registers and cache-align loop + PXOR X1, X1 + MOVUPS alpha+0(FP), X0 // X0 = { imag(a), real(a) } + MOVAPS X0, X1 + SHUFPD $0x1, X1, X1 // X1 = { real(a), imag(a) } + XORQ AX, AX // i = 0 + MOVAPS X0, X10 // Copy X0 and X1 for pipelining + MOVAPS X1, X11 + MOVQ CX, BX + ANDQ $3, CX // CX = n % 4 + SHRQ $2, BX // BX = floor( n / 4 ) + JZ caxy_tail // if BX == 0 { goto caxy_tail } + +caxy_loop: // do { + MOVUPS (SI)(AX*8), X2 // X_i = { imag(x[i]), real(x[i]) } + MOVUPS 16(SI)(AX*8), X4 + MOVUPS 32(SI)(AX*8), X6 + MOVUPS 48(SI)(AX*8), X8 + + // X_(i+1) = { real(x[i], real(x[i]) } + MOVDDUP_X2_X3 + MOVDDUP_X4_X5 + MOVDDUP_X6_X7 + MOVDDUP_X8_X9 + + // X_i = { imag(x[i]), imag(x[i]) } + SHUFPD $0x3, X2, X2 + SHUFPD $0x3, X4, X4 + SHUFPD $0x3, X6, X6 + SHUFPD $0x3, X8, X8 + + // X_i = { real(a) * imag(x[i]), imag(a) * imag(x[i]) } + // X_(i+1) = { imag(a) * real(x[i]), real(a) * real(x[i]) } + MULPD X1, X2 + MULPD X0, X3 + MULPD X11, X4 + MULPD X10, X5 + MULPD X1, X6 + MULPD X0, X7 + MULPD X11, X8 + MULPD X10, X9 + + // X_(i+1) = { + // imag(result[i]): imag(a)*real(x[i]) + real(a)*imag(x[i]), + // real(result[i]): real(a)*real(x[i]) - imag(a)*imag(x[i]) + // } + ADDSUBPD_X2_X3 + ADDSUBPD_X4_X5 + ADDSUBPD_X6_X7 + ADDSUBPD_X8_X9 + + // X_(i+1) = { imag(result[i]) + imag(y[i]), real(result[i]) + real(y[i]) } + ADDPD (DI)(AX*8), X3 + ADDPD 16(DI)(AX*8), X5 + ADDPD 32(DI)(AX*8), X7 + ADDPD 48(DI)(AX*8), X9 + MOVUPS X3, (DI)(AX*8) // y[i] = X_(i+1) + MOVUPS X5, 16(DI)(AX*8) + MOVUPS X7, 32(DI)(AX*8) + MOVUPS X9, 48(DI)(AX*8) + ADDQ $8, AX // i += 8 + DECQ BX + JNZ caxy_loop // } while --BX > 0 + CMPQ CX, $0 // if CX == 0 { return } + JE caxy_end + +caxy_tail: // do { + MOVUPS (SI)(AX*8), X2 // X_i = { imag(x[i]), real(x[i]) } + MOVDDUP_X2_X3 // X_(i+1) = { real(x[i], real(x[i]) } + SHUFPD $0x3, X2, X2 // X_i = { imag(x[i]), imag(x[i]) } + MULPD X1, X2 // X_i = { real(a) * imag(x[i]), imag(a) * imag(x[i]) } + MULPD X0, X3 // X_(i+1) = { imag(a) * real(x[i]), real(a) * real(x[i]) } + + // X_(i+1) = { + // imag(result[i]): imag(a)*real(x[i]) + real(a)*imag(x[i]), + // real(result[i]): real(a)*real(x[i]) - imag(a)*imag(x[i]) + // } + ADDSUBPD_X2_X3 + + // X_(i+1) = { imag(result[i]) + imag(y[i]), real(result[i]) + real(y[i]) } + ADDPD (DI)(AX*8), X3 + MOVUPS X3, (DI)(AX*8) // y[i] = X_(i+1) + ADDQ $2, AX // i += 2 + LOOP caxy_tail // } while --CX > 0 + +caxy_end: + RET diff --git a/vendor/gonum.org/v1/gonum/internal/asm/c128/axpyunitaryto_amd64.s b/vendor/gonum.org/v1/gonum/internal/asm/c128/axpyunitaryto_amd64.s new file mode 100644 index 0000000000..64add6886c --- /dev/null +++ b/vendor/gonum.org/v1/gonum/internal/asm/c128/axpyunitaryto_amd64.s @@ -0,0 +1,123 @@ +// Copyright ©2016 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build !noasm,!gccgo,!safe + +#include "textflag.h" + +// MOVDDUP X2, X3 +#define MOVDDUP_X2_X3 BYTE $0xF2; BYTE $0x0F; BYTE $0x12; BYTE $0xDA +// MOVDDUP X4, X5 +#define MOVDDUP_X4_X5 BYTE $0xF2; BYTE $0x0F; BYTE $0x12; BYTE $0xEC +// MOVDDUP X6, X7 +#define MOVDDUP_X6_X7 BYTE $0xF2; BYTE $0x0F; BYTE $0x12; BYTE $0xFE +// MOVDDUP X8, X9 +#define MOVDDUP_X8_X9 BYTE $0xF2; BYTE $0x45; BYTE $0x0F; BYTE $0x12; BYTE $0xC8 + +// ADDSUBPD X2, X3 +#define ADDSUBPD_X2_X3 BYTE $0x66; BYTE $0x0F; BYTE $0xD0; BYTE $0xDA +// ADDSUBPD X4, X5 +#define ADDSUBPD_X4_X5 BYTE $0x66; BYTE $0x0F; BYTE $0xD0; BYTE $0xEC +// ADDSUBPD X6, X7 +#define ADDSUBPD_X6_X7 BYTE $0x66; BYTE $0x0F; BYTE $0xD0; BYTE $0xFE +// ADDSUBPD X8, X9 +#define ADDSUBPD_X8_X9 BYTE $0x66; BYTE $0x45; BYTE $0x0F; BYTE $0xD0; BYTE $0xC8 + +// func AxpyUnitaryTo(dst []complex128, alpha complex64, x, y []complex128) +TEXT ·AxpyUnitaryTo(SB), NOSPLIT, $0 + MOVQ dst_base+0(FP), DI // DI = &dst + MOVQ x_base+40(FP), SI // SI = &x + MOVQ y_base+64(FP), DX // DX = &y + MOVQ x_len+48(FP), CX // CX = min( len(x), len(y), len(dst) ) + CMPQ y_len+72(FP), CX + CMOVQLE y_len+72(FP), CX + CMPQ dst_len+8(FP), CX + CMOVQLE dst_len+8(FP), CX + CMPQ CX, $0 // if CX == 0 { return } + JE caxy_end + MOVUPS alpha+24(FP), X0 // X0 = { imag(a), real(a) } + MOVAPS X0, X1 + SHUFPD $0x1, X1, X1 // X1 = { real(a), imag(a) } + XORQ AX, AX // i = 0 + MOVAPS X0, X10 // Copy X0 and X1 for pipelining + MOVAPS X1, X11 + MOVQ CX, BX + ANDQ $3, CX // CX = n % 4 + SHRQ $2, BX // BX = floor( n / 4 ) + JZ caxy_tail // if BX == 0 { goto caxy_tail } + +caxy_loop: // do { + MOVUPS (SI)(AX*8), X2 // X_i = { imag(x[i]), real(x[i]) } + MOVUPS 16(SI)(AX*8), X4 + MOVUPS 32(SI)(AX*8), X6 + MOVUPS 48(SI)(AX*8), X8 + + // X_(i+1) = { real(x[i], real(x[i]) } + MOVDDUP_X2_X3 // Load and duplicate imag elements (xi, xi) + MOVDDUP_X4_X5 + MOVDDUP_X6_X7 + MOVDDUP_X8_X9 + + // X_i = { imag(x[i]), imag(x[i]) } + SHUFPD $0x3, X2, X2 // duplicate real elements (xr, xr) + SHUFPD $0x3, X4, X4 + SHUFPD $0x3, X6, X6 + SHUFPD $0x3, X8, X8 + + // X_i = { real(a) * imag(x[i]), imag(a) * imag(x[i]) } + // X_(i+1) = { imag(a) * real(x[i]), real(a) * real(x[i]) } + MULPD X1, X2 + MULPD X0, X3 + MULPD X11, X4 + MULPD X10, X5 + MULPD X1, X6 + MULPD X0, X7 + MULPD X11, X8 + MULPD X10, X9 + + // X_(i+1) = { + // imag(result[i]): imag(a)*real(x[i]) + real(a)*imag(x[i]), + // real(result[i]): real(a)*real(x[i]) - imag(a)*imag(x[i]) + // } + ADDSUBPD_X2_X3 + ADDSUBPD_X4_X5 + ADDSUBPD_X6_X7 + ADDSUBPD_X8_X9 + + // X_(i+1) = { imag(result[i]) + imag(y[i]), real(result[i]) + real(y[i]) } + ADDPD (DX)(AX*8), X3 + ADDPD 16(DX)(AX*8), X5 + ADDPD 32(DX)(AX*8), X7 + ADDPD 48(DX)(AX*8), X9 + MOVUPS X3, (DI)(AX*8) // y[i] = X_(i+1) + MOVUPS X5, 16(DI)(AX*8) + MOVUPS X7, 32(DI)(AX*8) + MOVUPS X9, 48(DI)(AX*8) + ADDQ $8, AX // i += 8 + DECQ BX + JNZ caxy_loop // } while --BX > 0 + CMPQ CX, $0 // if CX == 0 { return } + JE caxy_end + +caxy_tail: // Same calculation, but read in values to avoid trampling memory + MOVUPS (SI)(AX*8), X2 // X_i = { imag(x[i]), real(x[i]) } + MOVDDUP_X2_X3 // X_(i+1) = { real(x[i], real(x[i]) } + SHUFPD $0x3, X2, X2 // X_i = { imag(x[i]), imag(x[i]) } + MULPD X1, X2 // X_i = { real(a) * imag(x[i]), imag(a) * imag(x[i]) } + MULPD X0, X3 // X_(i+1) = { imag(a) * real(x[i]), real(a) * real(x[i]) } + + // X_(i+1) = { + // imag(result[i]): imag(a)*real(x[i]) + real(a)*imag(x[i]), + // real(result[i]): real(a)*real(x[i]) - imag(a)*imag(x[i]) + // } + ADDSUBPD_X2_X3 + + // X_(i+1) = { imag(result[i]) + imag(y[i]), real(result[i]) + real(y[i]) } + ADDPD (DX)(AX*8), X3 + MOVUPS X3, (DI)(AX*8) // y[i] = X_(i+1) + ADDQ $2, AX // i += 2 + LOOP caxy_tail // } while --CX > 0 + +caxy_end: + RET diff --git a/vendor/gonum.org/v1/gonum/internal/asm/c128/doc.go b/vendor/gonum.org/v1/gonum/internal/asm/c128/doc.go new file mode 100644 index 0000000000..8802ff138a --- /dev/null +++ b/vendor/gonum.org/v1/gonum/internal/asm/c128/doc.go @@ -0,0 +1,6 @@ +// Copyright ©2017 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package c128 provides complex128 vector primitives. +package c128 // import "gonum.org/v1/gonum/internal/asm/c128" diff --git a/vendor/gonum.org/v1/gonum/internal/asm/c128/dotcinc_amd64.s b/vendor/gonum.org/v1/gonum/internal/asm/c128/dotcinc_amd64.s new file mode 100644 index 0000000000..235f67e7a2 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/internal/asm/c128/dotcinc_amd64.s @@ -0,0 +1,153 @@ +// Copyright ©2016 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build !noasm,!gccgo,!safe + +#include "textflag.h" + +#define MOVDDUP_XPTR__X3 LONG $0x1E120FF2 // MOVDDUP (SI), X3 +#define MOVDDUP_XPTR_INCX__X5 LONG $0x120F42F2; WORD $0x062C // MOVDDUP (SI)(R8*1), X5 +#define MOVDDUP_XPTR_INCX_2__X7 LONG $0x120F42F2; WORD $0x463C // MOVDDUP (SI)(R8*2), X7 +#define MOVDDUP_XPTR_INCx3X__X9 LONG $0x120F46F2; WORD $0x0E0C // MOVDDUP (SI)(R9*1), X9 + +#define MOVDDUP_8_XPTR__X2 LONG $0x56120FF2; BYTE $0x08 // MOVDDUP 8(SI), X2 +#define MOVDDUP_8_XPTR_INCX__X4 LONG $0x120F42F2; WORD $0x0664; BYTE $0x08 // MOVDDUP 8(SI)(R8*1), X4 +#define MOVDDUP_8_XPTR_INCX_2__X6 LONG $0x120F42F2; WORD $0x4674; BYTE $0x08 // MOVDDUP 8(SI)(R8*2), X6 +#define MOVDDUP_8_XPTR_INCx3X__X8 LONG $0x120F46F2; WORD $0x0E44; BYTE $0x08 // MOVDDUP 8(SI)(R9*1), X8 + +#define ADDSUBPD_X2_X3 LONG $0xDAD00F66 // ADDSUBPD X2, X3 +#define ADDSUBPD_X4_X5 LONG $0xECD00F66 // ADDSUBPD X4, X5 +#define ADDSUBPD_X6_X7 LONG $0xFED00F66 // ADDSUBPD X6, X7 +#define ADDSUBPD_X8_X9 LONG $0xD00F4566; BYTE $0xC8 // ADDSUBPD X8, X9 + +#define X_PTR SI +#define Y_PTR DI +#define LEN CX +#define TAIL BX +#define SUM X0 +#define P_SUM X1 +#define INC_X R8 +#define INCx3_X R9 +#define INC_Y R10 +#define INCx3_Y R11 +#define NEG1 X15 +#define P_NEG1 X14 + +// func DotcInc(x, y []complex128, n, incX, incY, ix, iy uintptr) (sum complex128) +TEXT ·DotcInc(SB), NOSPLIT, $0 + MOVQ x_base+0(FP), X_PTR // X_PTR = &x + MOVQ y_base+24(FP), Y_PTR // Y_PTR = &y + MOVQ n+48(FP), LEN // LEN = n + PXOR SUM, SUM // SUM = 0 + CMPQ LEN, $0 // if LEN == 0 { return } + JE dot_end + PXOR P_SUM, P_SUM // P_SUM = 0 + MOVQ ix+72(FP), INC_X // INC_X = ix * sizeof(complex128) + SHLQ $4, INC_X + MOVQ iy+80(FP), INC_Y // INC_Y = iy * sizeof(complex128) + SHLQ $4, INC_Y + LEAQ (X_PTR)(INC_X*1), X_PTR // X_PTR = &(X_PTR[ix]) + LEAQ (Y_PTR)(INC_Y*1), Y_PTR // Y_PTR = &(Y_PTR[iy]) + MOVQ incX+56(FP), INC_X // INC_X = incX + SHLQ $4, INC_X // INC_X *= sizeof(complex128) + MOVQ incY+64(FP), INC_Y // INC_Y = incY + SHLQ $4, INC_Y // INC_Y *= sizeof(complex128) + MOVSD $(-1.0), NEG1 + SHUFPD $0, NEG1, NEG1 // { -1, -1 } + MOVQ LEN, TAIL + ANDQ $3, TAIL // TAIL = n % 4 + SHRQ $2, LEN // LEN = floor( n / 4 ) + JZ dot_tail // if n <= 4 { goto dot_tail } + MOVAPS NEG1, P_NEG1 // Copy NEG1 to P_NEG1 for pipelining + LEAQ (INC_X)(INC_X*2), INCx3_X // INCx3_X = 3 * incX * sizeof(complex128) + LEAQ (INC_Y)(INC_Y*2), INCx3_Y // INCx3_Y = 3 * incY * sizeof(complex128) + +dot_loop: // do { + MOVDDUP_XPTR__X3 // X_(i+1) = { real(x[i], real(x[i]) } + MOVDDUP_XPTR_INCX__X5 + MOVDDUP_XPTR_INCX_2__X7 + MOVDDUP_XPTR_INCx3X__X9 + + MOVDDUP_8_XPTR__X2 // X_i = { imag(x[i]), imag(x[i]) } + MOVDDUP_8_XPTR_INCX__X4 + MOVDDUP_8_XPTR_INCX_2__X6 + MOVDDUP_8_XPTR_INCx3X__X8 + + // X_i = { -imag(x[i]), -imag(x[i]) } + MULPD NEG1, X2 + MULPD P_NEG1, X4 + MULPD NEG1, X6 + MULPD P_NEG1, X8 + + // X_j = { imag(y[i]), real(y[i]) } + MOVUPS (Y_PTR), X10 + MOVUPS (Y_PTR)(INC_Y*1), X11 + MOVUPS (Y_PTR)(INC_Y*2), X12 + MOVUPS (Y_PTR)(INCx3_Y*1), X13 + + // X_(i+1) = { imag(a) * real(x[i]), real(a) * real(x[i]) } + MULPD X10, X3 + MULPD X11, X5 + MULPD X12, X7 + MULPD X13, X9 + + // X_j = { real(y[i]), imag(y[i]) } + SHUFPD $0x1, X10, X10 + SHUFPD $0x1, X11, X11 + SHUFPD $0x1, X12, X12 + SHUFPD $0x1, X13, X13 + + // X_i = { real(a) * imag(x[i]), imag(a) * imag(x[i]) } + MULPD X10, X2 + MULPD X11, X4 + MULPD X12, X6 + MULPD X13, X8 + + // X_(i+1) = { + // imag(result[i]): imag(a)*real(x[i]) + real(a)*imag(x[i]), + // real(result[i]): real(a)*real(x[i]) - imag(a)*imag(x[i]) + // } + ADDSUBPD_X2_X3 + ADDSUBPD_X4_X5 + ADDSUBPD_X6_X7 + ADDSUBPD_X8_X9 + + // psum += result[i] + ADDPD X3, SUM + ADDPD X5, P_SUM + ADDPD X7, SUM + ADDPD X9, P_SUM + + LEAQ (X_PTR)(INC_X*4), X_PTR // X_PTR = &(X_PTR[incX*4]) + LEAQ (Y_PTR)(INC_Y*4), Y_PTR // Y_PTR = &(Y_PTR[incY*4]) + + DECQ LEN + JNZ dot_loop // } while --LEN > 0 + ADDPD P_SUM, SUM // sum += psum + CMPQ TAIL, $0 // if TAIL == 0 { return } + JE dot_end + +dot_tail: // do { + MOVDDUP_XPTR__X3 // X_(i+1) = { real(x[i], real(x[i]) } + MOVDDUP_8_XPTR__X2 // X_i = { imag(x[i]), imag(x[i]) } + MULPD NEG1, X2 // X_i = { -imag(x[i]) , -imag(x[i]) } + MOVUPS (Y_PTR), X10 // X_j = { imag(y[i]) , real(y[i]) } + MULPD X10, X3 // X_(i+1) = { imag(a) * real(x[i]), real(a) * real(x[i]) } + SHUFPD $0x1, X10, X10 // X_j = { real(y[i]) , imag(y[i]) } + MULPD X10, X2 // X_i = { real(a) * imag(x[i]), imag(a) * imag(x[i]) } + + // X_(i+1) = { + // imag(result[i]): imag(a)*real(x[i]) + real(a)*imag(x[i]), + // real(result[i]): real(a)*real(x[i]) - imag(a)*imag(x[i]) + // } + ADDSUBPD_X2_X3 + ADDPD X3, SUM // sum += result[i] + ADDQ INC_X, X_PTR // X_PTR += incX + ADDQ INC_Y, Y_PTR // Y_PTR += incY + DECQ TAIL + JNZ dot_tail // } while --TAIL > 0 + +dot_end: + MOVUPS SUM, sum+88(FP) + RET diff --git a/vendor/gonum.org/v1/gonum/internal/asm/c128/dotcunitary_amd64.s b/vendor/gonum.org/v1/gonum/internal/asm/c128/dotcunitary_amd64.s new file mode 100644 index 0000000000..0ffd0f1289 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/internal/asm/c128/dotcunitary_amd64.s @@ -0,0 +1,143 @@ +// Copyright ©2016 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build !noasm,!gccgo,!safe + +#include "textflag.h" + +#define MOVDDUP_XPTR_IDX_8__X3 LONG $0x1C120FF2; BYTE $0xC6 // MOVDDUP (SI)(AX*8), X3 +#define MOVDDUP_16_XPTR_IDX_8__X5 LONG $0x6C120FF2; WORD $0x10C6 // MOVDDUP 16(SI)(AX*8), X5 +#define MOVDDUP_32_XPTR_IDX_8__X7 LONG $0x7C120FF2; WORD $0x20C6 // MOVDDUP 32(SI)(AX*8), X7 +#define MOVDDUP_48_XPTR_IDX_8__X9 LONG $0x120F44F2; WORD $0xC64C; BYTE $0x30 // MOVDDUP 48(SI)(AX*8), X9 + +#define MOVDDUP_XPTR_IIDX_8__X2 LONG $0x14120FF2; BYTE $0xD6 // MOVDDUP (SI)(DX*8), X2 +#define MOVDDUP_16_XPTR_IIDX_8__X4 LONG $0x64120FF2; WORD $0x10D6 // MOVDDUP 16(SI)(DX*8), X4 +#define MOVDDUP_32_XPTR_IIDX_8__X6 LONG $0x74120FF2; WORD $0x20D6 // MOVDDUP 32(SI)(DX*8), X6 +#define MOVDDUP_48_XPTR_IIDX_8__X8 LONG $0x120F44F2; WORD $0xD644; BYTE $0x30 // MOVDDUP 48(SI)(DX*8), X8 + +#define ADDSUBPD_X2_X3 LONG $0xDAD00F66 // ADDSUBPD X2, X3 +#define ADDSUBPD_X4_X5 LONG $0xECD00F66 // ADDSUBPD X4, X5 +#define ADDSUBPD_X6_X7 LONG $0xFED00F66 // ADDSUBPD X6, X7 +#define ADDSUBPD_X8_X9 LONG $0xD00F4566; BYTE $0xC8 // ADDSUBPD X8, X9 + +#define X_PTR SI +#define Y_PTR DI +#define LEN CX +#define TAIL BX +#define SUM X0 +#define P_SUM X1 +#define IDX AX +#define I_IDX DX +#define NEG1 X15 +#define P_NEG1 X14 + +// func DotcUnitary(x, y []complex128) (sum complex128) +TEXT ·DotcUnitary(SB), NOSPLIT, $0 + MOVQ x_base+0(FP), X_PTR // X_PTR = &x + MOVQ y_base+24(FP), Y_PTR // Y_PTR = &y + MOVQ x_len+8(FP), LEN // LEN = min( len(x), len(y) ) + CMPQ y_len+32(FP), LEN + CMOVQLE y_len+32(FP), LEN + PXOR SUM, SUM // sum = 0 + CMPQ LEN, $0 // if LEN == 0 { return } + JE dot_end + XORPS P_SUM, P_SUM // psum = 0 + MOVSD $(-1.0), NEG1 + SHUFPD $0, NEG1, NEG1 // { -1, -1 } + XORQ IDX, IDX // i := 0 + MOVQ $1, I_IDX // j := 1 + MOVQ LEN, TAIL + ANDQ $3, TAIL // TAIL = floor( TAIL / 4 ) + SHRQ $2, LEN // LEN = TAIL % 4 + JZ dot_tail // if LEN == 0 { goto dot_tail } + + MOVAPS NEG1, P_NEG1 // Copy NEG1 to P_NEG1 for pipelining + +dot_loop: // do { + MOVDDUP_XPTR_IDX_8__X3 // X_(i+1) = { real(x[i], real(x[i]) } + MOVDDUP_16_XPTR_IDX_8__X5 + MOVDDUP_32_XPTR_IDX_8__X7 + MOVDDUP_48_XPTR_IDX_8__X9 + + MOVDDUP_XPTR_IIDX_8__X2 // X_i = { imag(x[i]), imag(x[i]) } + MOVDDUP_16_XPTR_IIDX_8__X4 + MOVDDUP_32_XPTR_IIDX_8__X6 + MOVDDUP_48_XPTR_IIDX_8__X8 + + // X_i = { -imag(x[i]), -imag(x[i]) } + MULPD NEG1, X2 + MULPD P_NEG1, X4 + MULPD NEG1, X6 + MULPD P_NEG1, X8 + + // X_j = { imag(y[i]), real(y[i]) } + MOVUPS (Y_PTR)(IDX*8), X10 + MOVUPS 16(Y_PTR)(IDX*8), X11 + MOVUPS 32(Y_PTR)(IDX*8), X12 + MOVUPS 48(Y_PTR)(IDX*8), X13 + + // X_(i+1) = { imag(a) * real(x[i]), real(a) * real(x[i]) } + MULPD X10, X3 + MULPD X11, X5 + MULPD X12, X7 + MULPD X13, X9 + + // X_j = { real(y[i]), imag(y[i]) } + SHUFPD $0x1, X10, X10 + SHUFPD $0x1, X11, X11 + SHUFPD $0x1, X12, X12 + SHUFPD $0x1, X13, X13 + + // X_i = { real(a) * imag(x[i]), imag(a) * imag(x[i]) } + MULPD X10, X2 + MULPD X11, X4 + MULPD X12, X6 + MULPD X13, X8 + + // X_(i+1) = { + // imag(result[i]): imag(a)*real(x[i]) + real(a)*imag(x[i]), + // real(result[i]): real(a)*real(x[i]) - imag(a)*imag(x[i]) + // } + ADDSUBPD_X2_X3 + ADDSUBPD_X4_X5 + ADDSUBPD_X6_X7 + ADDSUBPD_X8_X9 + + // psum += result[i] + ADDPD X3, SUM + ADDPD X5, P_SUM + ADDPD X7, SUM + ADDPD X9, P_SUM + + ADDQ $8, IDX // IDX += 8 + ADDQ $8, I_IDX // I_IDX += 8 + DECQ LEN + JNZ dot_loop // } while --LEN > 0 + ADDPD P_SUM, SUM // sum += psum + CMPQ TAIL, $0 // if TAIL == 0 { return } + JE dot_end + +dot_tail: // do { + MOVDDUP_XPTR_IDX_8__X3 // X_(i+1) = { real(x[i]) , real(x[i]) } + MOVDDUP_XPTR_IIDX_8__X2 // X_i = { imag(x[i]) , imag(x[i]) } + MULPD NEG1, X2 // X_i = { -imag(x[i]) , -imag(x[i]) } + MOVUPS (Y_PTR)(IDX*8), X10 // X_j = { imag(y[i]) , real(y[i]) } + MULPD X10, X3 // X_(i+1) = { imag(a) * real(x[i]), real(a) * real(x[i]) } + SHUFPD $0x1, X10, X10 // X_j = { real(y[i]) , imag(y[i]) } + MULPD X10, X2 // X_i = { real(a) * imag(x[i]), imag(a) * imag(x[i]) } + + // X_(i+1) = { + // imag(result[i]): imag(a)*real(x[i]) + real(a)*imag(x[i]), + // real(result[i]): real(a)*real(x[i]) - imag(a)*imag(x[i]) + // } + ADDSUBPD_X2_X3 + ADDPD X3, SUM // SUM += result[i] + ADDQ $2, IDX // IDX += 2 + ADDQ $2, I_IDX // I_IDX += 2 + DECQ TAIL + JNZ dot_tail // } while --TAIL > 0 + +dot_end: + MOVUPS SUM, sum+48(FP) + RET diff --git a/vendor/gonum.org/v1/gonum/internal/asm/c128/dotuinc_amd64.s b/vendor/gonum.org/v1/gonum/internal/asm/c128/dotuinc_amd64.s new file mode 100644 index 0000000000..74fe5c3ba5 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/internal/asm/c128/dotuinc_amd64.s @@ -0,0 +1,141 @@ +// Copyright ©2016 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build !noasm,!gccgo,!safe + +#include "textflag.h" + +#define MOVDDUP_XPTR__X3 LONG $0x1E120FF2 // MOVDDUP (SI), X3 +#define MOVDDUP_XPTR_INCX__X5 LONG $0x120F42F2; WORD $0x062C // MOVDDUP (SI)(R8*1), X5 +#define MOVDDUP_XPTR_INCX_2__X7 LONG $0x120F42F2; WORD $0x463C // MOVDDUP (SI)(R8*2), X7 +#define MOVDDUP_XPTR_INCx3X__X9 LONG $0x120F46F2; WORD $0x0E0C // MOVDDUP (SI)(R9*1), X9 + +#define MOVDDUP_8_XPTR__X2 LONG $0x56120FF2; BYTE $0x08 // MOVDDUP 8(SI), X2 +#define MOVDDUP_8_XPTR_INCX__X4 LONG $0x120F42F2; WORD $0x0664; BYTE $0x08 // MOVDDUP 8(SI)(R8*1), X4 +#define MOVDDUP_8_XPTR_INCX_2__X6 LONG $0x120F42F2; WORD $0x4674; BYTE $0x08 // MOVDDUP 8(SI)(R8*2), X6 +#define MOVDDUP_8_XPTR_INCx3X__X8 LONG $0x120F46F2; WORD $0x0E44; BYTE $0x08 // MOVDDUP 8(SI)(R9*1), X8 + +#define ADDSUBPD_X2_X3 LONG $0xDAD00F66 // ADDSUBPD X2, X3 +#define ADDSUBPD_X4_X5 LONG $0xECD00F66 // ADDSUBPD X4, X5 +#define ADDSUBPD_X6_X7 LONG $0xFED00F66 // ADDSUBPD X6, X7 +#define ADDSUBPD_X8_X9 LONG $0xD00F4566; BYTE $0xC8 // ADDSUBPD X8, X9 + +#define X_PTR SI +#define Y_PTR DI +#define LEN CX +#define TAIL BX +#define SUM X0 +#define P_SUM X1 +#define INC_X R8 +#define INCx3_X R9 +#define INC_Y R10 +#define INCx3_Y R11 + +// func DotuInc(x, y []complex128, n, incX, incY, ix, iy uintptr) (sum complex128) +TEXT ·DotuInc(SB), NOSPLIT, $0 + MOVQ x_base+0(FP), X_PTR // X_PTR = &x + MOVQ y_base+24(FP), Y_PTR // Y_PTR = &y + MOVQ n+48(FP), LEN // LEN = n + PXOR SUM, SUM // sum = 0 + CMPQ LEN, $0 // if LEN == 0 { return } + JE dot_end + MOVQ ix+72(FP), INC_X // INC_X = ix * sizeof(complex128) + SHLQ $4, INC_X + MOVQ iy+80(FP), INC_Y // INC_Y = iy * sizeof(complex128) + SHLQ $4, INC_Y + LEAQ (X_PTR)(INC_X*1), X_PTR // X_PTR = &(X_PTR[ix]) + LEAQ (Y_PTR)(INC_Y*1), Y_PTR // Y_PTR = &(Y_PTR[iy]) + MOVQ incX+56(FP), INC_X // INC_X = incX + SHLQ $4, INC_X // INC_X *= sizeof(complex128) + MOVQ incY+64(FP), INC_Y // INC_Y = incY + SHLQ $4, INC_Y // INC_Y *= sizeof(complex128) + MOVQ LEN, TAIL + ANDQ $3, TAIL // LEN = LEN % 4 + SHRQ $2, LEN // LEN = floor( LEN / 4 ) + JZ dot_tail // if LEN <= 4 { goto dot_tail } + PXOR P_SUM, P_SUM // psum = 0 + LEAQ (INC_X)(INC_X*2), INCx3_X // INCx3_X = 3 * incX * sizeof(complex128) + LEAQ (INC_Y)(INC_Y*2), INCx3_Y // INCx3_Y = 3 * incY * sizeof(complex128) + +dot_loop: // do { + MOVDDUP_XPTR__X3 // X_(i+1) = { real(x[i], real(x[i]) } + MOVDDUP_XPTR_INCX__X5 + MOVDDUP_XPTR_INCX_2__X7 + MOVDDUP_XPTR_INCx3X__X9 + + MOVDDUP_8_XPTR__X2 // X_i = { imag(x[i]), imag(x[i]) } + MOVDDUP_8_XPTR_INCX__X4 + MOVDDUP_8_XPTR_INCX_2__X6 + MOVDDUP_8_XPTR_INCx3X__X8 + + // X_j = { imag(y[i]), real(y[i]) } + MOVUPS (Y_PTR), X10 + MOVUPS (Y_PTR)(INC_Y*1), X11 + MOVUPS (Y_PTR)(INC_Y*2), X12 + MOVUPS (Y_PTR)(INCx3_Y*1), X13 + + // X_(i+1) = { imag(a) * real(x[i]), real(a) * real(x[i]) } + MULPD X10, X3 + MULPD X11, X5 + MULPD X12, X7 + MULPD X13, X9 + + // X_j = { real(y[i]), imag(y[i]) } + SHUFPD $0x1, X10, X10 + SHUFPD $0x1, X11, X11 + SHUFPD $0x1, X12, X12 + SHUFPD $0x1, X13, X13 + + // X_i = { real(a) * imag(x[i]), imag(a) * imag(x[i]) } + MULPD X10, X2 + MULPD X11, X4 + MULPD X12, X6 + MULPD X13, X8 + + // X_(i+1) = { + // imag(result[i]): imag(a)*real(x[i]) + real(a)*imag(x[i]), + // real(result[i]): real(a)*real(x[i]) - imag(a)*imag(x[i]) + // } + ADDSUBPD_X2_X3 + ADDSUBPD_X4_X5 + ADDSUBPD_X6_X7 + ADDSUBPD_X8_X9 + + // psum += result[i] + ADDPD X3, SUM + ADDPD X5, P_SUM + ADDPD X7, SUM + ADDPD X9, P_SUM + + LEAQ (X_PTR)(INC_X*4), X_PTR // X_PTR = &(X_PTR[incX*4]) + LEAQ (Y_PTR)(INC_Y*4), Y_PTR // Y_PTR = &(Y_PTR[incY*4]) + + DECQ LEN + JNZ dot_loop // } while --BX > 0 + ADDPD P_SUM, SUM // sum += psum + CMPQ TAIL, $0 // if TAIL == 0 { return } + JE dot_end + +dot_tail: // do { + MOVDDUP_XPTR__X3 // X_(i+1) = { real(x[i], real(x[i]) } + MOVDDUP_8_XPTR__X2 // X_i = { imag(x[i]), imag(x[i]) } + MOVUPS (Y_PTR), X10 // X_j = { imag(y[i]) , real(y[i]) } + MULPD X10, X3 // X_(i+1) = { imag(a) * real(x[i]), real(a) * real(x[i]) } + SHUFPD $0x1, X10, X10 // X_j = { real(y[i]) , imag(y[i]) } + MULPD X10, X2 // X_i = { real(a) * imag(x[i]), imag(a) * imag(x[i]) } + + // X_(i+1) = { + // imag(result[i]): imag(a)*real(x[i]) + real(a)*imag(x[i]), + // real(result[i]): real(a)*real(x[i]) - imag(a)*imag(x[i]) + // } + ADDSUBPD_X2_X3 + ADDPD X3, SUM // sum += result[i] + ADDQ INC_X, X_PTR // X_PTR += incX + ADDQ INC_Y, Y_PTR // Y_PTR += incY + DECQ TAIL // --TAIL + JNZ dot_tail // } while TAIL > 0 + +dot_end: + MOVUPS SUM, sum+88(FP) + RET diff --git a/vendor/gonum.org/v1/gonum/internal/asm/c128/dotuunitary_amd64.s b/vendor/gonum.org/v1/gonum/internal/asm/c128/dotuunitary_amd64.s new file mode 100644 index 0000000000..8df019881b --- /dev/null +++ b/vendor/gonum.org/v1/gonum/internal/asm/c128/dotuunitary_amd64.s @@ -0,0 +1,130 @@ +// Copyright ©2016 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build !noasm,!gccgo,!safe + +#include "textflag.h" + +#define MOVDDUP_XPTR_IDX_8__X3 LONG $0x1C120FF2; BYTE $0xC6 // MOVDDUP (SI)(AX*8), X3 +#define MOVDDUP_16_XPTR_IDX_8__X5 LONG $0x6C120FF2; WORD $0x10C6 // MOVDDUP 16(SI)(AX*8), X5 +#define MOVDDUP_32_XPTR_IDX_8__X7 LONG $0x7C120FF2; WORD $0x20C6 // MOVDDUP 32(SI)(AX*8), X7 +#define MOVDDUP_48_XPTR_IDX_8__X9 LONG $0x120F44F2; WORD $0xC64C; BYTE $0x30 // MOVDDUP 48(SI)(AX*8), X9 + +#define MOVDDUP_XPTR_IIDX_8__X2 LONG $0x14120FF2; BYTE $0xD6 // MOVDDUP (SI)(DX*8), X2 +#define MOVDDUP_16_XPTR_IIDX_8__X4 LONG $0x64120FF2; WORD $0x10D6 // MOVDDUP 16(SI)(DX*8), X4 +#define MOVDDUP_32_XPTR_IIDX_8__X6 LONG $0x74120FF2; WORD $0x20D6 // MOVDDUP 32(SI)(DX*8), X6 +#define MOVDDUP_48_XPTR_IIDX_8__X8 LONG $0x120F44F2; WORD $0xD644; BYTE $0x30 // MOVDDUP 48(SI)(DX*8), X8 + +#define ADDSUBPD_X2_X3 LONG $0xDAD00F66 // ADDSUBPD X2, X3 +#define ADDSUBPD_X4_X5 LONG $0xECD00F66 // ADDSUBPD X4, X5 +#define ADDSUBPD_X6_X7 LONG $0xFED00F66 // ADDSUBPD X6, X7 +#define ADDSUBPD_X8_X9 LONG $0xD00F4566; BYTE $0xC8 // ADDSUBPD X8, X9 + +#define X_PTR SI +#define Y_PTR DI +#define LEN CX +#define TAIL BX +#define SUM X0 +#define P_SUM X1 +#define IDX AX +#define I_IDX DX + +// func DotuUnitary(x, y []complex128) (sum complex128) +TEXT ·DotuUnitary(SB), NOSPLIT, $0 + MOVQ x_base+0(FP), X_PTR // X_PTR = &x + MOVQ y_base+24(FP), Y_PTR // Y_PTR = &y + MOVQ x_len+8(FP), LEN // LEN = min( len(x), len(y) ) + CMPQ y_len+32(FP), LEN + CMOVQLE y_len+32(FP), LEN + PXOR SUM, SUM // SUM = 0 + CMPQ LEN, $0 // if LEN == 0 { return } + JE dot_end + PXOR P_SUM, P_SUM // P_SUM = 0 + XORQ IDX, IDX // IDX = 0 + MOVQ $1, DX // j = 1 + MOVQ LEN, TAIL + ANDQ $3, TAIL // TAIL = floor( LEN / 4 ) + SHRQ $2, LEN // LEN = LEN % 4 + JZ dot_tail // if LEN == 0 { goto dot_tail } + +dot_loop: // do { + MOVDDUP_XPTR_IDX_8__X3 // X_(i+1) = { real(x[i], real(x[i]) } + MOVDDUP_16_XPTR_IDX_8__X5 + MOVDDUP_32_XPTR_IDX_8__X7 + MOVDDUP_48_XPTR_IDX_8__X9 + + MOVDDUP_XPTR_IIDX_8__X2 // X_i = { imag(x[i]), imag(x[i]) } + MOVDDUP_16_XPTR_IIDX_8__X4 + MOVDDUP_32_XPTR_IIDX_8__X6 + MOVDDUP_48_XPTR_IIDX_8__X8 + + // X_j = { imag(y[i]), real(y[i]) } + MOVUPS (Y_PTR)(IDX*8), X10 + MOVUPS 16(Y_PTR)(IDX*8), X11 + MOVUPS 32(Y_PTR)(IDX*8), X12 + MOVUPS 48(Y_PTR)(IDX*8), X13 + + // X_(i+1) = { imag(a) * real(x[i]), real(a) * real(x[i]) } + MULPD X10, X3 + MULPD X11, X5 + MULPD X12, X7 + MULPD X13, X9 + + // X_j = { real(y[i]), imag(y[i]) } + SHUFPD $0x1, X10, X10 + SHUFPD $0x1, X11, X11 + SHUFPD $0x1, X12, X12 + SHUFPD $0x1, X13, X13 + + // X_i = { real(a) * imag(x[i]), imag(a) * imag(x[i]) } + MULPD X10, X2 + MULPD X11, X4 + MULPD X12, X6 + MULPD X13, X8 + + // X_(i+1) = { + // imag(result[i]): imag(a)*real(x[i]) + real(a)*imag(x[i]), + // real(result[i]): real(a)*real(x[i]) - imag(a)*imag(x[i]) + // } + ADDSUBPD_X2_X3 + ADDSUBPD_X4_X5 + ADDSUBPD_X6_X7 + ADDSUBPD_X8_X9 + + // psum += result[i] + ADDPD X3, SUM + ADDPD X5, P_SUM + ADDPD X7, SUM + ADDPD X9, P_SUM + + ADDQ $8, IDX // IDX += 8 + ADDQ $8, I_IDX // I_IDX += 8 + DECQ LEN + JNZ dot_loop // } while --LEN > 0 + ADDPD P_SUM, SUM // SUM += P_SUM + CMPQ TAIL, $0 // if TAIL == 0 { return } + JE dot_end + +dot_tail: // do { + MOVDDUP_XPTR_IDX_8__X3 // X_(i+1) = { real(x[i] , real(x[i]) } + MOVDDUP_XPTR_IIDX_8__X2 // X_i = { imag(x[i]) , imag(x[i]) } + MOVUPS (Y_PTR)(IDX*8), X10 // X_j = { imag(y[i]) , real(y[i]) } + MULPD X10, X3 // X_(i+1) = { imag(a) * real(x[i]), real(a) * real(x[i]) } + SHUFPD $0x1, X10, X10 // X_j = { real(y[i]) , imag(y[i]) } + MULPD X10, X2 // X_i = { real(a) * imag(x[i]), imag(a) * imag(x[i]) } + + // X_(i+1) = { + // imag(result[i]): imag(a)*real(x[i]) + real(a)*imag(x[i]), + // real(result[i]): real(a)*real(x[i]) - imag(a)*imag(x[i]) + // } + ADDSUBPD_X2_X3 + ADDPD X3, SUM // psum += result[i] + ADDQ $2, IDX // IDX += 2 + ADDQ $2, I_IDX // I_IDX += 2 + DECQ TAIL // --TAIL + JNZ dot_tail // } while TAIL > 0 + +dot_end: + MOVUPS SUM, sum+48(FP) + RET diff --git a/vendor/gonum.org/v1/gonum/internal/asm/c128/dscalinc_amd64.s b/vendor/gonum.org/v1/gonum/internal/asm/c128/dscalinc_amd64.s new file mode 100644 index 0000000000..77a28ccead --- /dev/null +++ b/vendor/gonum.org/v1/gonum/internal/asm/c128/dscalinc_amd64.s @@ -0,0 +1,69 @@ +// Copyright ©2017 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build !noasm,!gccgo,!safe + +#include "textflag.h" + +#define SRC SI +#define DST SI +#define LEN CX +#define TAIL BX +#define INC R9 +#define INC3 R10 +#define ALPHA X0 +#define ALPHA_2 X1 + +#define MOVDDUP_ALPHA LONG $0x44120FF2; WORD $0x0824 // MOVDDUP 8(SP), X0 + +// func DscalInc(alpha float64, x []complex128, n, inc uintptr) +TEXT ·DscalInc(SB), NOSPLIT, $0 + MOVQ x_base+8(FP), SRC // SRC = &x + MOVQ n+32(FP), LEN // LEN = n + CMPQ LEN, $0 // if LEN == 0 { return } + JE dscal_end + + MOVDDUP_ALPHA // ALPHA = alpha + MOVQ inc+40(FP), INC // INC = inc + SHLQ $4, INC // INC = INC * sizeof(complex128) + LEAQ (INC)(INC*2), INC3 // INC3 = 3 * INC + MOVUPS ALPHA, ALPHA_2 // Copy ALPHA and ALPHA_2 for pipelining + MOVQ LEN, TAIL // TAIL = LEN + SHRQ $2, LEN // LEN = floor( n / 4 ) + JZ dscal_tail // if LEN == 0 { goto dscal_tail } + +dscal_loop: // do { + MOVUPS (SRC), X2 // X_i = x[i] + MOVUPS (SRC)(INC*1), X3 + MOVUPS (SRC)(INC*2), X4 + MOVUPS (SRC)(INC3*1), X5 + + MULPD ALPHA, X2 // X_i *= ALPHA + MULPD ALPHA_2, X3 + MULPD ALPHA, X4 + MULPD ALPHA_2, X5 + + MOVUPS X2, (DST) // x[i] = X_i + MOVUPS X3, (DST)(INC*1) + MOVUPS X4, (DST)(INC*2) + MOVUPS X5, (DST)(INC3*1) + + LEAQ (SRC)(INC*4), SRC // SRC += INC*4 + DECQ LEN + JNZ dscal_loop // } while --LEN > 0 + +dscal_tail: + ANDQ $3, TAIL // TAIL = TAIL % 4 + JE dscal_end // if TAIL == 0 { return } + +dscal_tail_loop: // do { + MOVUPS (SRC), X2 // X_i = x[i] + MULPD ALPHA, X2 // X_i *= ALPHA + MOVUPS X2, (DST) // x[i] = X_i + ADDQ INC, SRC // SRC += INC + DECQ TAIL + JNZ dscal_tail_loop // } while --TAIL > 0 + +dscal_end: + RET diff --git a/vendor/gonum.org/v1/gonum/internal/asm/c128/dscalunitary_amd64.s b/vendor/gonum.org/v1/gonum/internal/asm/c128/dscalunitary_amd64.s new file mode 100644 index 0000000000..9fa91e4624 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/internal/asm/c128/dscalunitary_amd64.s @@ -0,0 +1,66 @@ +// Copyright ©2017 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build !noasm,!gccgo,!safe + +#include "textflag.h" + +#define SRC SI +#define DST SI +#define LEN CX +#define IDX AX +#define TAIL BX +#define ALPHA X0 +#define ALPHA_2 X1 + +#define MOVDDUP_ALPHA LONG $0x44120FF2; WORD $0x0824 // MOVDDUP 8(SP), X0 + +// func DscalUnitary(alpha float64, x []complex128) +TEXT ·DscalUnitary(SB), NOSPLIT, $0 + MOVQ x_base+8(FP), SRC // SRC = &x + MOVQ x_len+16(FP), LEN // LEN = len(x) + CMPQ LEN, $0 // if LEN == 0 { return } + JE dscal_end + + MOVDDUP_ALPHA // ALPHA = alpha + XORQ IDX, IDX // IDX = 0 + MOVUPS ALPHA, ALPHA_2 // Copy ALPHA to ALPHA_2 for pipelining + MOVQ LEN, TAIL // TAIL = LEN + SHRQ $2, LEN // LEN = floor( n / 4 ) + JZ dscal_tail // if LEN == 0 { goto dscal_tail } + +dscal_loop: // do { + MOVUPS (SRC)(IDX*8), X2 // X_i = x[i] + MOVUPS 16(SRC)(IDX*8), X3 + MOVUPS 32(SRC)(IDX*8), X4 + MOVUPS 48(SRC)(IDX*8), X5 + + MULPD ALPHA, X2 // X_i *= ALPHA + MULPD ALPHA_2, X3 + MULPD ALPHA, X4 + MULPD ALPHA_2, X5 + + MOVUPS X2, (DST)(IDX*8) // x[i] = X_i + MOVUPS X3, 16(DST)(IDX*8) + MOVUPS X4, 32(DST)(IDX*8) + MOVUPS X5, 48(DST)(IDX*8) + + ADDQ $8, IDX // IDX += 8 + DECQ LEN + JNZ dscal_loop // } while --LEN > 0 + +dscal_tail: + ANDQ $3, TAIL // TAIL = TAIL % 4 + JZ dscal_end // if TAIL == 0 { return } + +dscal_tail_loop: // do { + MOVUPS (SRC)(IDX*8), X2 // X_i = x[i] + MULPD ALPHA, X2 // X_i *= ALPHA + MOVUPS X2, (DST)(IDX*8) // x[i] = X_i + ADDQ $2, IDX // IDX += 2 + DECQ TAIL + JNZ dscal_tail_loop // } while --TAIL > 0 + +dscal_end: + RET diff --git a/vendor/gonum.org/v1/gonum/internal/asm/c128/scal.go b/vendor/gonum.org/v1/gonum/internal/asm/c128/scal.go new file mode 100644 index 0000000000..27c3581752 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/internal/asm/c128/scal.go @@ -0,0 +1,33 @@ +// Copyright ©2016 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package c128 + +// ScalUnitaryTo is +// +// for i, v := range x { +// dst[i] = alpha * v +// } +func ScalUnitaryTo(dst []complex128, alpha complex128, x []complex128) { + for i, v := range x { + dst[i] = alpha * v + } +} + +// ScalIncTo is +// +// var idst, ix uintptr +// for i := 0; i < int(n); i++ { +// dst[idst] = alpha * x[ix] +// ix += incX +// idst += incDst +// } +func ScalIncTo(dst []complex128, incDst uintptr, alpha complex128, x []complex128, n, incX uintptr) { + var idst, ix uintptr + for i := 0; i < int(n); i++ { + dst[idst] = alpha * x[ix] + ix += incX + idst += incDst + } +} diff --git a/vendor/gonum.org/v1/gonum/internal/asm/c128/scalUnitary_amd64.s b/vendor/gonum.org/v1/gonum/internal/asm/c128/scalUnitary_amd64.s new file mode 100644 index 0000000000..b76037fdd0 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/internal/asm/c128/scalUnitary_amd64.s @@ -0,0 +1,116 @@ +// Copyright ©2017 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build !noasm,!gccgo,!safe + +#include "textflag.h" + +#define SRC SI +#define DST SI +#define LEN CX +#define IDX AX +#define TAIL BX +#define ALPHA X0 +#define ALPHA_C X1 +#define ALPHA2 X10 +#define ALPHA_C2 X11 + +#define MOVDDUP_X2_X3 LONG $0xDA120FF2 // MOVDDUP X2, X3 +#define MOVDDUP_X4_X5 LONG $0xEC120FF2 // MOVDDUP X4, X5 +#define MOVDDUP_X6_X7 LONG $0xFE120FF2 // MOVDDUP X6, X7 +#define MOVDDUP_X8_X9 LONG $0x120F45F2; BYTE $0xC8 // MOVDDUP X8, X9 + +#define ADDSUBPD_X2_X3 LONG $0xDAD00F66 // ADDSUBPD X2, X3 +#define ADDSUBPD_X4_X5 LONG $0xECD00F66 // ADDSUBPD X4, X5 +#define ADDSUBPD_X6_X7 LONG $0xFED00F66 // ADDSUBPD X6, X7 +#define ADDSUBPD_X8_X9 LONG $0xD00F4566; BYTE $0xC8 // ADDSUBPD X8, X9 + +// func ScalUnitary(alpha complex128, x []complex128) +TEXT ·ScalUnitary(SB), NOSPLIT, $0 + MOVQ x_base+16(FP), SRC // SRC = &x + MOVQ x_len+24(FP), LEN // LEN = len(x) + CMPQ LEN, $0 // if LEN == 0 { return } + JE scal_end + + MOVUPS alpha+0(FP), ALPHA // ALPHA = { imag(alpha), real(alpha) } + MOVAPS ALPHA, ALPHA_C + SHUFPD $0x1, ALPHA_C, ALPHA_C // ALPHA_C = { real(alpha), imag(alpha) } + + XORQ IDX, IDX // IDX = 0 + MOVAPS ALPHA, ALPHA2 // Copy ALPHA and ALPHA_C for pipelining + MOVAPS ALPHA_C, ALPHA_C2 + MOVQ LEN, TAIL + SHRQ $2, LEN // LEN = floor( n / 4 ) + JZ scal_tail // if BX == 0 { goto scal_tail } + +scal_loop: // do { + MOVUPS (SRC)(IDX*8), X2 // X_i = { imag(x[i]), real(x[i]) } + MOVUPS 16(SRC)(IDX*8), X4 + MOVUPS 32(SRC)(IDX*8), X6 + MOVUPS 48(SRC)(IDX*8), X8 + + // X_(i+1) = { real(x[i], real(x[i]) } + MOVDDUP_X2_X3 + MOVDDUP_X4_X5 + MOVDDUP_X6_X7 + MOVDDUP_X8_X9 + + // X_i = { imag(x[i]), imag(x[i]) } + SHUFPD $0x3, X2, X2 + SHUFPD $0x3, X4, X4 + SHUFPD $0x3, X6, X6 + SHUFPD $0x3, X8, X8 + + // X_i = { real(ALPHA) * imag(x[i]), imag(ALPHA) * imag(x[i]) } + // X_(i+1) = { imag(ALPHA) * real(x[i]), real(ALPHA) * real(x[i]) } + MULPD ALPHA_C, X2 + MULPD ALPHA, X3 + MULPD ALPHA_C2, X4 + MULPD ALPHA2, X5 + MULPD ALPHA_C, X6 + MULPD ALPHA, X7 + MULPD ALPHA_C2, X8 + MULPD ALPHA2, X9 + + // X_(i+1) = { + // imag(result[i]): imag(ALPHA)*real(x[i]) + real(ALPHA)*imag(x[i]), + // real(result[i]): real(ALPHA)*real(x[i]) - imag(ALPHA)*imag(x[i]) + // } + ADDSUBPD_X2_X3 + ADDSUBPD_X4_X5 + ADDSUBPD_X6_X7 + ADDSUBPD_X8_X9 + + MOVUPS X3, (DST)(IDX*8) // x[i] = X_(i+1) + MOVUPS X5, 16(DST)(IDX*8) + MOVUPS X7, 32(DST)(IDX*8) + MOVUPS X9, 48(DST)(IDX*8) + ADDQ $8, IDX // IDX += 8 + DECQ LEN + JNZ scal_loop // } while --LEN > 0 + +scal_tail: + ANDQ $3, TAIL // TAIL = TAIL % 4 + JZ scal_end // if TAIL == 0 { return } + +scal_tail_loop: // do { + MOVUPS (SRC)(IDX*8), X2 // X_i = { imag(x[i]), real(x[i]) } + MOVDDUP_X2_X3 // X_(i+1) = { real(x[i], real(x[i]) } + SHUFPD $0x3, X2, X2 // X_i = { imag(x[i]), imag(x[i]) } + MULPD ALPHA_C, X2 // X_i = { real(ALPHA) * imag(x[i]), imag(ALPHA) * imag(x[i]) } + MULPD ALPHA, X3 // X_(i+1) = { imag(ALPHA) * real(x[i]), real(ALPHA) * real(x[i]) } + + // X_(i+1) = { + // imag(result[i]): imag(ALPHA)*real(x[i]) + real(ALPHA)*imag(x[i]), + // real(result[i]): real(ALPHA)*real(x[i]) - imag(ALPHA)*imag(x[i]) + // } + ADDSUBPD_X2_X3 + + MOVUPS X3, (DST)(IDX*8) // x[i] = X_(i+1) + ADDQ $2, IDX // IDX += 2 + DECQ TAIL + JNZ scal_tail_loop // } while --LEN > 0 + +scal_end: + RET diff --git a/vendor/gonum.org/v1/gonum/internal/asm/c128/scalinc_amd64.s b/vendor/gonum.org/v1/gonum/internal/asm/c128/scalinc_amd64.s new file mode 100644 index 0000000000..6e0e51b658 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/internal/asm/c128/scalinc_amd64.s @@ -0,0 +1,121 @@ +// Copyright ©2016 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build !noasm,!gccgo,!safe + +#include "textflag.h" + +#define SRC SI +#define DST SI +#define LEN CX +#define TAIL BX +#define INC R9 +#define INC3 R10 +#define ALPHA X0 +#define ALPHA_C X1 +#define ALPHA2 X10 +#define ALPHA_C2 X11 + +#define MOVDDUP_X2_X3 LONG $0xDA120FF2 // MOVDDUP X2, X3 +#define MOVDDUP_X4_X5 LONG $0xEC120FF2 // MOVDDUP X4, X5 +#define MOVDDUP_X6_X7 LONG $0xFE120FF2 // MOVDDUP X6, X7 +#define MOVDDUP_X8_X9 LONG $0x120F45F2; BYTE $0xC8 // MOVDDUP X8, X9 + +#define ADDSUBPD_X2_X3 LONG $0xDAD00F66 // ADDSUBPD X2, X3 +#define ADDSUBPD_X4_X5 LONG $0xECD00F66 // ADDSUBPD X4, X5 +#define ADDSUBPD_X6_X7 LONG $0xFED00F66 // ADDSUBPD X6, X7 +#define ADDSUBPD_X8_X9 LONG $0xD00F4566; BYTE $0xC8 // ADDSUBPD X8, X9 + +// func ScalInc(alpha complex128, x []complex128, n, inc uintptr) +TEXT ·ScalInc(SB), NOSPLIT, $0 + MOVQ x_base+16(FP), SRC // SRC = &x + MOVQ n+40(FP), LEN // LEN = len(x) + CMPQ LEN, $0 + JE scal_end // if LEN == 0 { return } + + MOVQ inc+48(FP), INC // INC = inc + SHLQ $4, INC // INC = INC * sizeof(complex128) + LEAQ (INC)(INC*2), INC3 // INC3 = 3 * INC + + MOVUPS alpha+0(FP), ALPHA // ALPHA = { imag(alpha), real(alpha) } + MOVAPS ALPHA, ALPHA_C + SHUFPD $0x1, ALPHA_C, ALPHA_C // ALPHA_C = { real(alpha), imag(alpha) } + + MOVAPS ALPHA, ALPHA2 // Copy ALPHA and ALPHA_C for pipelining + MOVAPS ALPHA_C, ALPHA_C2 + MOVQ LEN, TAIL + SHRQ $2, LEN // LEN = floor( n / 4 ) + JZ scal_tail // if BX == 0 { goto scal_tail } + +scal_loop: // do { + MOVUPS (SRC), X2 // X_i = { imag(x[i]), real(x[i]) } + MOVUPS (SRC)(INC*1), X4 + MOVUPS (SRC)(INC*2), X6 + MOVUPS (SRC)(INC3*1), X8 + + // X_(i+1) = { real(x[i], real(x[i]) } + MOVDDUP_X2_X3 + MOVDDUP_X4_X5 + MOVDDUP_X6_X7 + MOVDDUP_X8_X9 + + // X_i = { imag(x[i]), imag(x[i]) } + SHUFPD $0x3, X2, X2 + SHUFPD $0x3, X4, X4 + SHUFPD $0x3, X6, X6 + SHUFPD $0x3, X8, X8 + + // X_i = { real(ALPHA) * imag(x[i]), imag(ALPHA) * imag(x[i]) } + // X_(i+1) = { imag(ALPHA) * real(x[i]), real(ALPHA) * real(x[i]) } + MULPD ALPHA_C, X2 + MULPD ALPHA, X3 + MULPD ALPHA_C2, X4 + MULPD ALPHA2, X5 + MULPD ALPHA_C, X6 + MULPD ALPHA, X7 + MULPD ALPHA_C2, X8 + MULPD ALPHA2, X9 + + // X_(i+1) = { + // imag(result[i]): imag(ALPHA)*real(x[i]) + real(ALPHA)*imag(x[i]), + // real(result[i]): real(ALPHA)*real(x[i]) - imag(ALPHA)*imag(x[i]) + // } + ADDSUBPD_X2_X3 + ADDSUBPD_X4_X5 + ADDSUBPD_X6_X7 + ADDSUBPD_X8_X9 + + MOVUPS X3, (DST) // x[i] = X_(i+1) + MOVUPS X5, (DST)(INC*1) + MOVUPS X7, (DST)(INC*2) + MOVUPS X9, (DST)(INC3*1) + + LEAQ (SRC)(INC*4), SRC // SRC = &(SRC[inc*4]) + DECQ LEN + JNZ scal_loop // } while --BX > 0 + +scal_tail: + ANDQ $3, TAIL // TAIL = TAIL % 4 + JE scal_end // if TAIL == 0 { return } + +scal_tail_loop: // do { + MOVUPS (SRC), X2 // X_i = { imag(x[i]), real(x[i]) } + MOVDDUP_X2_X3 // X_(i+1) = { real(x[i], real(x[i]) } + SHUFPD $0x3, X2, X2 // X_i = { imag(x[i]), imag(x[i]) } + MULPD ALPHA_C, X2 // X_i = { real(ALPHA) * imag(x[i]), imag(ALPHA) * imag(x[i]) } + MULPD ALPHA, X3 // X_(i+1) = { imag(ALPHA) * real(x[i]), real(ALPHA) * real(x[i]) } + + // X_(i+1) = { + // imag(result[i]): imag(ALPHA)*real(x[i]) + real(ALPHA)*imag(x[i]), + // real(result[i]): real(ALPHA)*real(x[i]) - imag(ALPHA)*imag(x[i]) + // } + ADDSUBPD_X2_X3 + + MOVUPS X3, (DST) // x[i] = X_i + ADDQ INC, SRC // SRC = &(SRC[incX]) + DECQ TAIL + JNZ scal_tail_loop // } while --TAIL > 0 + +scal_end: + RET diff --git a/vendor/gonum.org/v1/gonum/internal/asm/c128/stubs.go b/vendor/gonum.org/v1/gonum/internal/asm/c128/stubs.go new file mode 100644 index 0000000000..9c3a8fb83d --- /dev/null +++ b/vendor/gonum.org/v1/gonum/internal/asm/c128/stubs.go @@ -0,0 +1,180 @@ +// Copyright ©2020 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package c128 + +import ( + "math" + "math/cmplx" +) + +// Add is +// +// for i, v := range s { +// dst[i] += v +// } +func Add(dst, s []complex128) { + for i, v := range s { + dst[i] += v + } +} + +// AddConst is +// +// for i := range x { +// x[i] += alpha +// } +func AddConst(alpha complex128, x []complex128) { + for i := range x { + x[i] += alpha + } +} + +// CumSum is +// +// if len(s) == 0 { +// return dst +// } +// dst[0] = s[0] +// for i, v := range s[1:] { +// dst[i+1] = dst[i] + v +// } +// return dst +func CumSum(dst, s []complex128) []complex128 { + if len(s) == 0 { + return dst + } + dst[0] = s[0] + for i, v := range s[1:] { + dst[i+1] = dst[i] + v + } + return dst +} + +// CumProd is +// +// if len(s) == 0 { +// return dst +// } +// dst[0] = s[0] +// for i, v := range s[1:] { +// dst[i+1] = dst[i] * v +// } +// return dst +func CumProd(dst, s []complex128) []complex128 { + if len(s) == 0 { + return dst + } + dst[0] = s[0] + for i, v := range s[1:] { + dst[i+1] = dst[i] * v + } + return dst +} + +// Div is +// +// for i, v := range s { +// dst[i] /= v +// } +func Div(dst, s []complex128) { + for i, v := range s { + dst[i] /= v + } +} + +// DivTo is +// +// for i, v := range s { +// dst[i] = v / t[i] +// } +// return dst +func DivTo(dst, s, t []complex128) []complex128 { + for i, v := range s { + dst[i] = v / t[i] + } + return dst +} + +// DotUnitary is +// +// for i, v := range x { +// sum += cmplx.Conj(v) * y[i] +// } +// return sum +func DotUnitary(x, y []complex128) (sum complex128) { + for i, v := range x { + sum += cmplx.Conj(v) * y[i] + } + return sum +} + +// L2DistanceUnitary returns the L2-norm of x-y. +func L2DistanceUnitary(x, y []complex128) (norm float64) { + var scale float64 + sumSquares := 1.0 + for i, v := range x { + v -= y[i] + if v == 0 { + continue + } + absxi := cmplx.Abs(v) + if math.IsNaN(absxi) { + return math.NaN() + } + if scale < absxi { + s := scale / absxi + sumSquares = 1 + sumSquares*s*s + scale = absxi + } else { + s := absxi / scale + sumSquares += s * s + } + } + if math.IsInf(scale, 1) { + return math.Inf(1) + } + return scale * math.Sqrt(sumSquares) +} + +// L2NormUnitary returns the L2-norm of x. +func L2NormUnitary(x []complex128) (norm float64) { + var scale float64 + sumSquares := 1.0 + for _, v := range x { + if v == 0 { + continue + } + absxi := cmplx.Abs(v) + if math.IsNaN(absxi) { + return math.NaN() + } + if scale < absxi { + s := scale / absxi + sumSquares = 1 + sumSquares*s*s + scale = absxi + } else { + s := absxi / scale + sumSquares += s * s + } + } + if math.IsInf(scale, 1) { + return math.Inf(1) + } + return scale * math.Sqrt(sumSquares) +} + +// Sum is +// +// var sum complex128 +// for i := range x { +// sum += x[i] +// } +func Sum(x []complex128) complex128 { + var sum complex128 + for _, v := range x { + sum += v + } + return sum +} diff --git a/vendor/gonum.org/v1/gonum/internal/asm/c128/stubs_amd64.go b/vendor/gonum.org/v1/gonum/internal/asm/c128/stubs_amd64.go new file mode 100644 index 0000000000..c0e26a2f1e --- /dev/null +++ b/vendor/gonum.org/v1/gonum/internal/asm/c128/stubs_amd64.go @@ -0,0 +1,109 @@ +// Copyright ©2016 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build !noasm && !gccgo && !safe +// +build !noasm,!gccgo,!safe + +package c128 + +// AxpyUnitary is +// +// for i, v := range x { +// y[i] += alpha * v +// } +func AxpyUnitary(alpha complex128, x, y []complex128) + +// AxpyUnitaryTo is +// +// for i, v := range x { +// dst[i] = alpha*v + y[i] +// } +func AxpyUnitaryTo(dst []complex128, alpha complex128, x, y []complex128) + +// AxpyInc is +// +// for i := 0; i < int(n); i++ { +// y[iy] += alpha * x[ix] +// ix += incX +// iy += incY +// } +func AxpyInc(alpha complex128, x, y []complex128, n, incX, incY, ix, iy uintptr) + +// AxpyIncTo is +// +// for i := 0; i < int(n); i++ { +// dst[idst] = alpha*x[ix] + y[iy] +// ix += incX +// iy += incY +// idst += incDst +// } +func AxpyIncTo(dst []complex128, incDst, idst uintptr, alpha complex128, x, y []complex128, n, incX, incY, ix, iy uintptr) + +// DscalUnitary is +// +// for i, v := range x { +// x[i] = complex(real(v)*alpha, imag(v)*alpha) +// } +func DscalUnitary(alpha float64, x []complex128) + +// DscalInc is +// +// var ix uintptr +// for i := 0; i < int(n); i++ { +// x[ix] = complex(real(x[ix])*alpha, imag(x[ix])*alpha) +// ix += inc +// } +func DscalInc(alpha float64, x []complex128, n, inc uintptr) + +// ScalInc is +// +// var ix uintptr +// for i := 0; i < int(n); i++ { +// x[ix] *= alpha +// ix += incX +// } +func ScalInc(alpha complex128, x []complex128, n, inc uintptr) + +// ScalUnitary is +// +// for i := range x { +// x[i] *= alpha +// } +func ScalUnitary(alpha complex128, x []complex128) + +// DotcUnitary is +// +// for i, v := range x { +// sum += y[i] * cmplx.Conj(v) +// } +// return sum +func DotcUnitary(x, y []complex128) (sum complex128) + +// DotcInc is +// +// for i := 0; i < int(n); i++ { +// sum += y[iy] * cmplx.Conj(x[ix]) +// ix += incX +// iy += incY +// } +// return sum +func DotcInc(x, y []complex128, n, incX, incY, ix, iy uintptr) (sum complex128) + +// DotuUnitary is +// +// for i, v := range x { +// sum += y[i] * v +// } +// return sum +func DotuUnitary(x, y []complex128) (sum complex128) + +// DotuInc is +// +// for i := 0; i < int(n); i++ { +// sum += y[iy] * x[ix] +// ix += incX +// iy += incY +// } +// return sum +func DotuInc(x, y []complex128, n, incX, incY, ix, iy uintptr) (sum complex128) diff --git a/vendor/gonum.org/v1/gonum/internal/asm/c128/stubs_noasm.go b/vendor/gonum.org/v1/gonum/internal/asm/c128/stubs_noasm.go new file mode 100644 index 0000000000..21dfc4a8e1 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/internal/asm/c128/stubs_noasm.go @@ -0,0 +1,176 @@ +// Copyright ©2016 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build !amd64 || noasm || gccgo || safe +// +build !amd64 noasm gccgo safe + +package c128 + +import "math/cmplx" + +// AxpyUnitary is +// +// for i, v := range x { +// y[i] += alpha * v +// } +func AxpyUnitary(alpha complex128, x, y []complex128) { + for i, v := range x { + y[i] += alpha * v + } +} + +// AxpyUnitaryTo is +// +// for i, v := range x { +// dst[i] = alpha*v + y[i] +// } +func AxpyUnitaryTo(dst []complex128, alpha complex128, x, y []complex128) { + for i, v := range x { + dst[i] = alpha*v + y[i] + } +} + +// AxpyInc is +// +// for i := 0; i < int(n); i++ { +// y[iy] += alpha * x[ix] +// ix += incX +// iy += incY +// } +func AxpyInc(alpha complex128, x, y []complex128, n, incX, incY, ix, iy uintptr) { + for i := 0; i < int(n); i++ { + y[iy] += alpha * x[ix] + ix += incX + iy += incY + } +} + +// AxpyIncTo is +// +// for i := 0; i < int(n); i++ { +// dst[idst] = alpha*x[ix] + y[iy] +// ix += incX +// iy += incY +// idst += incDst +// } +func AxpyIncTo(dst []complex128, incDst, idst uintptr, alpha complex128, x, y []complex128, n, incX, incY, ix, iy uintptr) { + for i := 0; i < int(n); i++ { + dst[idst] = alpha*x[ix] + y[iy] + ix += incX + iy += incY + idst += incDst + } +} + +// DscalUnitary is +// +// for i, v := range x { +// x[i] = complex(real(v)*alpha, imag(v)*alpha) +// } +func DscalUnitary(alpha float64, x []complex128) { + for i, v := range x { + x[i] = complex(real(v)*alpha, imag(v)*alpha) + } +} + +// DscalInc is +// +// var ix uintptr +// for i := 0; i < int(n); i++ { +// x[ix] = complex(real(x[ix])*alpha, imag(x[ix])*alpha) +// ix += inc +// } +func DscalInc(alpha float64, x []complex128, n, inc uintptr) { + var ix uintptr + for i := 0; i < int(n); i++ { + x[ix] = complex(real(x[ix])*alpha, imag(x[ix])*alpha) + ix += inc + } +} + +// ScalInc is +// +// var ix uintptr +// for i := 0; i < int(n); i++ { +// x[ix] *= alpha +// ix += incX +// } +func ScalInc(alpha complex128, x []complex128, n, inc uintptr) { + var ix uintptr + for i := 0; i < int(n); i++ { + x[ix] *= alpha + ix += inc + } +} + +// ScalUnitary is +// +// for i := range x { +// x[i] *= alpha +// } +func ScalUnitary(alpha complex128, x []complex128) { + for i := range x { + x[i] *= alpha + } +} + +// DotcUnitary is +// +// for i, v := range x { +// sum += y[i] * cmplx.Conj(v) +// } +// return sum +func DotcUnitary(x, y []complex128) (sum complex128) { + for i, v := range x { + sum += y[i] * cmplx.Conj(v) + } + return sum +} + +// DotcInc is +// +// for i := 0; i < int(n); i++ { +// sum += y[iy] * cmplx.Conj(x[ix]) +// ix += incX +// iy += incY +// } +// return sum +func DotcInc(x, y []complex128, n, incX, incY, ix, iy uintptr) (sum complex128) { + for i := 0; i < int(n); i++ { + sum += y[iy] * cmplx.Conj(x[ix]) + ix += incX + iy += incY + } + return sum +} + +// DotuUnitary is +// +// for i, v := range x { +// sum += y[i] * v +// } +// return sum +func DotuUnitary(x, y []complex128) (sum complex128) { + for i, v := range x { + sum += y[i] * v + } + return sum +} + +// DotuInc is +// +// for i := 0; i < int(n); i++ { +// sum += y[iy] * x[ix] +// ix += incX +// iy += incY +// } +// return sum +func DotuInc(x, y []complex128, n, incX, incY, ix, iy uintptr) (sum complex128) { + for i := 0; i < int(n); i++ { + sum += y[iy] * x[ix] + ix += incX + iy += incY + } + return sum +} diff --git a/vendor/gonum.org/v1/gonum/internal/asm/c64/axpyinc_amd64.s b/vendor/gonum.org/v1/gonum/internal/asm/c64/axpyinc_amd64.s new file mode 100644 index 0000000000..4d2c5e9ad5 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/internal/asm/c64/axpyinc_amd64.s @@ -0,0 +1,151 @@ +// Copyright ©2016 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build !noasm,!gccgo,!safe + +#include "textflag.h" + +// MOVSHDUP X3, X2 +#define MOVSHDUP_X3_X2 BYTE $0xF3; BYTE $0x0F; BYTE $0x16; BYTE $0xD3 +// MOVSLDUP X3, X3 +#define MOVSLDUP_X3_X3 BYTE $0xF3; BYTE $0x0F; BYTE $0x12; BYTE $0xDB +// ADDSUBPS X2, X3 +#define ADDSUBPS_X2_X3 BYTE $0xF2; BYTE $0x0F; BYTE $0xD0; BYTE $0xDA + +// MOVSHDUP X5, X4 +#define MOVSHDUP_X5_X4 BYTE $0xF3; BYTE $0x0F; BYTE $0x16; BYTE $0xE5 +// MOVSLDUP X5, X5 +#define MOVSLDUP_X5_X5 BYTE $0xF3; BYTE $0x0F; BYTE $0x12; BYTE $0xED +// ADDSUBPS X4, X5 +#define ADDSUBPS_X4_X5 BYTE $0xF2; BYTE $0x0F; BYTE $0xD0; BYTE $0xEC + +// MOVSHDUP X7, X6 +#define MOVSHDUP_X7_X6 BYTE $0xF3; BYTE $0x0F; BYTE $0x16; BYTE $0xF7 +// MOVSLDUP X7, X7 +#define MOVSLDUP_X7_X7 BYTE $0xF3; BYTE $0x0F; BYTE $0x12; BYTE $0xFF +// ADDSUBPS X6, X7 +#define ADDSUBPS_X6_X7 BYTE $0xF2; BYTE $0x0F; BYTE $0xD0; BYTE $0xFE + +// MOVSHDUP X9, X8 +#define MOVSHDUP_X9_X8 BYTE $0xF3; BYTE $0x45; BYTE $0x0F; BYTE $0x16; BYTE $0xC1 +// MOVSLDUP X9, X9 +#define MOVSLDUP_X9_X9 BYTE $0xF3; BYTE $0x45; BYTE $0x0F; BYTE $0x12; BYTE $0xC9 +// ADDSUBPS X8, X9 +#define ADDSUBPS_X8_X9 BYTE $0xF2; BYTE $0x45; BYTE $0x0F; BYTE $0xD0; BYTE $0xC8 + +// func AxpyInc(alpha complex64, x, y []complex64, n, incX, incY, ix, iy uintptr) +TEXT ·AxpyInc(SB), NOSPLIT, $0 + MOVQ x_base+8(FP), SI // SI = &x + MOVQ y_base+32(FP), DI // DI = &y + MOVQ n+56(FP), CX // CX = n + CMPQ CX, $0 // if n==0 { return } + JE axpyi_end + MOVQ ix+80(FP), R8 // R8 = ix + MOVQ iy+88(FP), R9 // R9 = iy + LEAQ (SI)(R8*8), SI // SI = &(x[ix]) + LEAQ (DI)(R9*8), DI // DI = &(y[iy]) + MOVQ DI, DX // DX = DI // Read/Write pointers + MOVQ incX+64(FP), R8 // R8 = incX + SHLQ $3, R8 // R8 *= sizeof(complex64) + MOVQ incY+72(FP), R9 // R9 = incY + SHLQ $3, R9 // R9 *= sizeof(complex64) + MOVSD alpha+0(FP), X0 // X0 = { 0, 0, imag(a), real(a) } + MOVAPS X0, X1 + SHUFPS $0x11, X1, X1 // X1 = { 0, 0, real(a), imag(a) } + MOVAPS X0, X10 // Copy X0 and X1 for pipelining + MOVAPS X1, X11 + MOVQ CX, BX + ANDQ $3, CX // CX = n % 4 + SHRQ $2, BX // BX = floor( n / 4 ) + JZ axpyi_tail // if BX == 0 { goto axpyi_tail } + +axpyi_loop: // do { + MOVSD (SI), X3 // X_i = { imag(x[i+1]), real(x[i+1]) } + MOVSD (SI)(R8*1), X5 + LEAQ (SI)(R8*2), SI // SI = &(SI[incX*2]) + MOVSD (SI), X7 + MOVSD (SI)(R8*1), X9 + + // X_(i-1) = { imag(x[i]), imag(x[i]) } + MOVSHDUP_X3_X2 + MOVSHDUP_X5_X4 + MOVSHDUP_X7_X6 + MOVSHDUP_X9_X8 + + // X_i = { real(x[i]), real(x[i]) } + MOVSLDUP_X3_X3 + MOVSLDUP_X5_X5 + MOVSLDUP_X7_X7 + MOVSLDUP_X9_X9 + + // X_(i-1) = { real(a) * imag(x[i]), imag(a) * imag(x[i]) } + // X_i = { imag(a) * real(x[i]), real(a) * real(x[i]) } + MULPS X1, X2 + MULPS X0, X3 + MULPS X11, X4 + MULPS X10, X5 + MULPS X1, X6 + MULPS X0, X7 + MULPS X11, X8 + MULPS X10, X9 + + // X_i = { + // imag(result[i]): imag(a)*real(x[i]) + real(a)*imag(x[i]), + // real(result[i]): real(a)*real(x[i]) - imag(a)*imag(x[i]), + // } + ADDSUBPS_X2_X3 + ADDSUBPS_X4_X5 + ADDSUBPS_X6_X7 + ADDSUBPS_X8_X9 + + // X_i = { imag(result[i]) + imag(y[i]), real(result[i]) + real(y[i]) } + MOVSD (DX), X2 + MOVSD (DX)(R9*1), X4 + LEAQ (DX)(R9*2), DX // DX = &(DX[incY*2]) + MOVSD (DX), X6 + MOVSD (DX)(R9*1), X8 + ADDPS X2, X3 + ADDPS X4, X5 + ADDPS X6, X7 + ADDPS X8, X9 + + MOVSD X3, (DI) // y[i] = X_i + MOVSD X5, (DI)(R9*1) + LEAQ (DI)(R9*2), DI // DI = &(DI[incDst]) + MOVSD X7, (DI) + MOVSD X9, (DI)(R9*1) + LEAQ (SI)(R8*2), SI // SI = &(SI[incX*2]) + LEAQ (DX)(R9*2), DX // DX = &(DX[incY*2]) + LEAQ (DI)(R9*2), DI // DI = &(DI[incDst]) + DECQ BX + JNZ axpyi_loop // } while --BX > 0 + CMPQ CX, $0 // if CX == 0 { return } + JE axpyi_end + +axpyi_tail: // do { + MOVSD (SI), X3 // X_i = { imag(x[i+1]), real(x[i+1]) } + MOVSHDUP_X3_X2 // X_(i-1) = { real(x[i]), real(x[i]) } + MOVSLDUP_X3_X3 // X_i = { imag(x[i]), imag(x[i]) } + + // X_i = { imag(a) * real(x[i]), real(a) * real(x[i]) } + // X_(i-1) = { real(a) * imag(x[i]), imag(a) * imag(x[i]) } + MULPS X1, X2 + MULPS X0, X3 + + // X_i = { + // imag(result[i]): imag(a)*real(x[i]) + real(a)*imag(x[i]), + // real(result[i]): real(a)*real(x[i]) - imag(a)*imag(x[i]), + // } + ADDSUBPS_X2_X3 // (ai*x1r+ar*x1i, ar*x1r-ai*x1i) + + // X_i = { imag(result[i]) + imag(y[i]), real(result[i]) + real(y[i]) } + MOVSD (DI), X4 + ADDPS X4, X3 + MOVSD X3, (DI) // y[i] = X_i + ADDQ R8, SI // SI += incX + ADDQ R9, DI // DI += incY + LOOP axpyi_tail // } while --CX > 0 + +axpyi_end: + RET diff --git a/vendor/gonum.org/v1/gonum/internal/asm/c64/axpyincto_amd64.s b/vendor/gonum.org/v1/gonum/internal/asm/c64/axpyincto_amd64.s new file mode 100644 index 0000000000..1519f2d9b3 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/internal/asm/c64/axpyincto_amd64.s @@ -0,0 +1,156 @@ +// Copyright ©2016 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build !noasm,!gccgo,!safe + +#include "textflag.h" + +// MOVSHDUP X3, X2 +#define MOVSHDUP_X3_X2 BYTE $0xF3; BYTE $0x0F; BYTE $0x16; BYTE $0xD3 +// MOVSLDUP X3, X3 +#define MOVSLDUP_X3_X3 BYTE $0xF3; BYTE $0x0F; BYTE $0x12; BYTE $0xDB +// ADDSUBPS X2, X3 +#define ADDSUBPS_X2_X3 BYTE $0xF2; BYTE $0x0F; BYTE $0xD0; BYTE $0xDA + +// MOVSHDUP X5, X4 +#define MOVSHDUP_X5_X4 BYTE $0xF3; BYTE $0x0F; BYTE $0x16; BYTE $0xE5 +// MOVSLDUP X5, X5 +#define MOVSLDUP_X5_X5 BYTE $0xF3; BYTE $0x0F; BYTE $0x12; BYTE $0xED +// ADDSUBPS X4, X5 +#define ADDSUBPS_X4_X5 BYTE $0xF2; BYTE $0x0F; BYTE $0xD0; BYTE $0xEC + +// MOVSHDUP X7, X6 +#define MOVSHDUP_X7_X6 BYTE $0xF3; BYTE $0x0F; BYTE $0x16; BYTE $0xF7 +// MOVSLDUP X7, X7 +#define MOVSLDUP_X7_X7 BYTE $0xF3; BYTE $0x0F; BYTE $0x12; BYTE $0xFF +// ADDSUBPS X6, X7 +#define ADDSUBPS_X6_X7 BYTE $0xF2; BYTE $0x0F; BYTE $0xD0; BYTE $0xFE + +// MOVSHDUP X9, X8 +#define MOVSHDUP_X9_X8 BYTE $0xF3; BYTE $0x45; BYTE $0x0F; BYTE $0x16; BYTE $0xC1 +// MOVSLDUP X9, X9 +#define MOVSLDUP_X9_X9 BYTE $0xF3; BYTE $0x45; BYTE $0x0F; BYTE $0x12; BYTE $0xC9 +// ADDSUBPS X8, X9 +#define ADDSUBPS_X8_X9 BYTE $0xF2; BYTE $0x45; BYTE $0x0F; BYTE $0xD0; BYTE $0xC8 + +// func AxpyIncTo(dst []complex64, incDst, idst uintptr, alpha complex64, x, y []complex64, n, incX, incY, ix, iy uintptr) +TEXT ·AxpyIncTo(SB), NOSPLIT, $0 + MOVQ dst_base+0(FP), DI // DI = &dst + MOVQ x_base+48(FP), SI // SI = &x + MOVQ y_base+72(FP), DX // DX = &y + MOVQ n+96(FP), CX // CX = n + CMPQ CX, $0 // if n==0 { return } + JE axpyi_end + MOVQ ix+120(FP), R8 // Load the first index + MOVQ iy+128(FP), R9 + MOVQ idst+32(FP), R10 + LEAQ (SI)(R8*8), SI // SI = &(x[ix]) + LEAQ (DX)(R9*8), DX // DX = &(y[iy]) + LEAQ (DI)(R10*8), DI // DI = &(dst[idst]) + MOVQ incX+104(FP), R8 // Incrementors*8 for easy iteration (ADDQ) + SHLQ $3, R8 + MOVQ incY+112(FP), R9 + SHLQ $3, R9 + MOVQ incDst+24(FP), R10 + SHLQ $3, R10 + MOVSD alpha+40(FP), X0 // X0 = { 0, 0, imag(a), real(a) } + MOVAPS X0, X1 + SHUFPS $0x11, X1, X1 // X1 = { 0, 0, real(a), imag(a) } + MOVAPS X0, X10 // Copy X0 and X1 for pipelining + MOVAPS X1, X11 + MOVQ CX, BX + ANDQ $3, CX // CX = n % 4 + SHRQ $2, BX // BX = floor( n / 4 ) + JZ axpyi_tail // if BX == 0 { goto axpyi_tail } + +axpyi_loop: // do { + MOVSD (SI), X3 // X_i = { imag(x[i]), real(x[i]) } + MOVSD (SI)(R8*1), X5 + LEAQ (SI)(R8*2), SI // SI = &(SI[incX*2]) + MOVSD (SI), X7 + MOVSD (SI)(R8*1), X9 + + // X_(i-1) = { imag(x[i]), imag(x[i]) } + MOVSHDUP_X3_X2 + MOVSHDUP_X5_X4 + MOVSHDUP_X7_X6 + MOVSHDUP_X9_X8 + + // X_i = { real(x[i]), real(x[i]) } + MOVSLDUP_X3_X3 + MOVSLDUP_X5_X5 + MOVSLDUP_X7_X7 + MOVSLDUP_X9_X9 + + // X_(i-1) = { real(a) * imag(x[i]), imag(a) * imag(x[i]) } + // X_i = { imag(a) * real(x[i]), real(a) * real(x[i]) } + MULPS X1, X2 + MULPS X0, X3 + MULPS X11, X4 + MULPS X10, X5 + MULPS X1, X6 + MULPS X0, X7 + MULPS X11, X8 + MULPS X10, X9 + + // X_i = { + // imag(result[i]): imag(a)*real(x[i]) + real(a)*imag(x[i]), + // real(result[i]): real(a)*real(x[i]) - imag(a)*imag(x[i]), + // } + ADDSUBPS_X2_X3 + ADDSUBPS_X4_X5 + ADDSUBPS_X6_X7 + ADDSUBPS_X8_X9 + + // X_i = { imag(result[i]) + imag(y[i]), real(result[i]) + real(y[i]) } + MOVSD (DX), X2 + MOVSD (DX)(R9*1), X4 + LEAQ (DX)(R9*2), DX // DX = &(DX[incY*2]) + MOVSD (DX), X6 + MOVSD (DX)(R9*1), X8 + ADDPS X2, X3 + ADDPS X4, X5 + ADDPS X6, X7 + ADDPS X8, X9 + + MOVSD X3, (DI) // y[i] = X_i + MOVSD X5, (DI)(R10*1) + LEAQ (DI)(R10*2), DI // DI = &(DI[incDst]) + MOVSD X7, (DI) + MOVSD X9, (DI)(R10*1) + LEAQ (SI)(R8*2), SI // SI = &(SI[incX*2]) + LEAQ (DX)(R9*2), DX // DX = &(DX[incY*2]) + LEAQ (DI)(R10*2), DI // DI = &(DI[incDst]) + DECQ BX + JNZ axpyi_loop // } while --BX > 0 + CMPQ CX, $0 // if CX == 0 { return } + JE axpyi_end + +axpyi_tail: + MOVSD (SI), X3 // X_i = { imag(x[i]), real(x[i]) } + MOVSHDUP_X3_X2 // X_(i-1) = { imag(x[i]), imag(x[i]) } + MOVSLDUP_X3_X3 // X_i = { real(x[i]), real(x[i]) } + + // X_i = { imag(a) * real(x[i]), real(a) * real(x[i]) } + // X_(i-1) = { real(a) * imag(x[i]), imag(a) * imag(x[i]) } + MULPS X1, X2 + MULPS X0, X3 + + // X_i = { + // imag(result[i]): imag(a)*real(x[i]) + real(a)*imag(x[i]), + // real(result[i]): real(a)*real(x[i]) - imag(a)*imag(x[i]), + // } + ADDSUBPS_X2_X3 + + // X_i = { imag(result[i]) + imag(y[i]), real(result[i]) + real(y[i]) } + MOVSD (DX), X4 + ADDPS X4, X3 + MOVSD X3, (DI) // y[i] = X_i + ADDQ R8, SI // SI += incX + ADDQ R9, DX // DX += incY + ADDQ R10, DI // DI += incDst + LOOP axpyi_tail // } while --CX > 0 + +axpyi_end: + RET diff --git a/vendor/gonum.org/v1/gonum/internal/asm/c64/axpyunitary_amd64.s b/vendor/gonum.org/v1/gonum/internal/asm/c64/axpyunitary_amd64.s new file mode 100644 index 0000000000..71274c92cc --- /dev/null +++ b/vendor/gonum.org/v1/gonum/internal/asm/c64/axpyunitary_amd64.s @@ -0,0 +1,160 @@ +// Copyright ©2016 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build !noasm,!gccgo,!safe + +#include "textflag.h" + +// MOVSHDUP X3, X2 +#define MOVSHDUP_X3_X2 BYTE $0xF3; BYTE $0x0F; BYTE $0x16; BYTE $0xD3 +// MOVSLDUP X3, X3 +#define MOVSLDUP_X3_X3 BYTE $0xF3; BYTE $0x0F; BYTE $0x12; BYTE $0xDB +// ADDSUBPS X2, X3 +#define ADDSUBPS_X2_X3 BYTE $0xF2; BYTE $0x0F; BYTE $0xD0; BYTE $0xDA + +// MOVSHDUP X5, X4 +#define MOVSHDUP_X5_X4 BYTE $0xF3; BYTE $0x0F; BYTE $0x16; BYTE $0xE5 +// MOVSLDUP X5, X5 +#define MOVSLDUP_X5_X5 BYTE $0xF3; BYTE $0x0F; BYTE $0x12; BYTE $0xED +// ADDSUBPS X4, X5 +#define ADDSUBPS_X4_X5 BYTE $0xF2; BYTE $0x0F; BYTE $0xD0; BYTE $0xEC + +// MOVSHDUP X7, X6 +#define MOVSHDUP_X7_X6 BYTE $0xF3; BYTE $0x0F; BYTE $0x16; BYTE $0xF7 +// MOVSLDUP X7, X7 +#define MOVSLDUP_X7_X7 BYTE $0xF3; BYTE $0x0F; BYTE $0x12; BYTE $0xFF +// ADDSUBPS X6, X7 +#define ADDSUBPS_X6_X7 BYTE $0xF2; BYTE $0x0F; BYTE $0xD0; BYTE $0xFE + +// MOVSHDUP X9, X8 +#define MOVSHDUP_X9_X8 BYTE $0xF3; BYTE $0x45; BYTE $0x0F; BYTE $0x16; BYTE $0xC1 +// MOVSLDUP X9, X9 +#define MOVSLDUP_X9_X9 BYTE $0xF3; BYTE $0x45; BYTE $0x0F; BYTE $0x12; BYTE $0xC9 +// ADDSUBPS X8, X9 +#define ADDSUBPS_X8_X9 BYTE $0xF2; BYTE $0x45; BYTE $0x0F; BYTE $0xD0; BYTE $0xC8 + +// func AxpyUnitary(alpha complex64, x, y []complex64) +TEXT ·AxpyUnitary(SB), NOSPLIT, $0 + MOVQ x_base+8(FP), SI // SI = &x + MOVQ y_base+32(FP), DI // DI = &y + MOVQ x_len+16(FP), CX // CX = min( len(x), len(y) ) + CMPQ y_len+40(FP), CX + CMOVQLE y_len+40(FP), CX + CMPQ CX, $0 // if CX == 0 { return } + JE caxy_end + PXOR X0, X0 // Clear work registers and cache-align loop + PXOR X1, X1 + MOVSD alpha+0(FP), X0 // X0 = { 0, 0, imag(a), real(a) } + SHUFPD $0, X0, X0 // X0 = { imag(a), real(a), imag(a), real(a) } + MOVAPS X0, X1 + SHUFPS $0x11, X1, X1 // X1 = { real(a), imag(a), real(a), imag(a) } + XORQ AX, AX // i = 0 + MOVQ DI, BX // Align on 16-byte boundary for ADDPS + ANDQ $15, BX // BX = &y & 15 + JZ caxy_no_trim // if BX == 0 { goto caxy_no_trim } + + // Trim first value in unaligned buffer + XORPS X2, X2 // Clear work registers and cache-align loop + XORPS X3, X3 + XORPS X4, X4 + MOVSD (SI)(AX*8), X3 // X3 = { imag(x[i]), real(x[i]) } + MOVSHDUP_X3_X2 // X2 = { imag(x[i]), imag(x[i]) } + MOVSLDUP_X3_X3 // X3 = { real(x[i]), real(x[i]) } + MULPS X1, X2 // X2 = { real(a) * imag(x[i]), imag(a) * imag(x[i]) } + MULPS X0, X3 // X3 = { imag(a) * real(x[i]), real(a) * real(x[i]) } + + // X3 = { imag(a)*real(x[i]) + real(a)*imag(x[i]), real(a)*real(x[i]) - imag(a)*imag(x[i]) } + ADDSUBPS_X2_X3 + MOVSD (DI)(AX*8), X4 // X3 += y[i] + ADDPS X4, X3 + MOVSD X3, (DI)(AX*8) // y[i] = X3 + INCQ AX // i++ + DECQ CX // --CX + JZ caxy_end // if CX == 0 { return } + +caxy_no_trim: + MOVAPS X0, X10 // Copy X0 and X1 for pipelineing + MOVAPS X1, X11 + MOVQ CX, BX + ANDQ $7, CX // CX = n % 8 + SHRQ $3, BX // BX = floor( n / 8 ) + JZ caxy_tail // if BX == 0 { goto caxy_tail } + +caxy_loop: // do { + // X_i = { imag(x[i]), real(x[i]), imag(x[i+1]), real(x[i+1]) } + MOVUPS (SI)(AX*8), X3 + MOVUPS 16(SI)(AX*8), X5 + MOVUPS 32(SI)(AX*8), X7 + MOVUPS 48(SI)(AX*8), X9 + + // X_(i-1) = { imag(x[i]), imag(x[i]), imag(x[i]+1), imag(x[i]+1) } + MOVSHDUP_X3_X2 + MOVSHDUP_X5_X4 + MOVSHDUP_X7_X6 + MOVSHDUP_X9_X8 + + // X_i = { real(x[i]), real(x[i]), real(x[i+1]), real(x[i+1]) } + MOVSLDUP_X3_X3 + MOVSLDUP_X5_X5 + MOVSLDUP_X7_X7 + MOVSLDUP_X9_X9 + + // X_i = { imag(a) * real(x[i]), real(a) * real(x[i]), + // imag(a) * real(x[i+1]), real(a) * real(x[i+1]) } + // X_(i-1) = { real(a) * imag(x[i]), imag(a) * imag(x[i]), + // real(a) * imag(x[i+1]), imag(a) * imag(x[i+1]) } + MULPS X1, X2 + MULPS X0, X3 + MULPS X11, X4 + MULPS X10, X5 + MULPS X1, X6 + MULPS X0, X7 + MULPS X11, X8 + MULPS X10, X9 + + // X_i = { + // imag(result[i]): imag(a)*real(x[i]) + real(a)*imag(x[i]), + // real(result[i]): real(a)*real(x[i]) - imag(a)*imag(x[i]), + // imag(result[i+1]): imag(a)*real(x[i+1]) + real(a)*imag(x[i+1]), + // real(result[i+1]): real(a)*real(x[i+1]) - imag(a)*imag(x[i+1]), + // } + ADDSUBPS_X2_X3 + ADDSUBPS_X4_X5 + ADDSUBPS_X6_X7 + ADDSUBPS_X8_X9 + + // X_i = { imag(result[i]) + imag(y[i]), real(result[i]) + real(y[i]), + // imag(result[i+1]) + imag(y[i+1]), real(result[i+1]) + real(y[i+1]) } + ADDPS (DI)(AX*8), X3 + ADDPS 16(DI)(AX*8), X5 + ADDPS 32(DI)(AX*8), X7 + ADDPS 48(DI)(AX*8), X9 + MOVUPS X3, (DI)(AX*8) // y[i:i+1] = X_i + MOVUPS X5, 16(DI)(AX*8) + MOVUPS X7, 32(DI)(AX*8) + MOVUPS X9, 48(DI)(AX*8) + ADDQ $8, AX // i += 8 + DECQ BX // --BX + JNZ caxy_loop // } while BX > 0 + CMPQ CX, $0 // if CX == 0 { return } + JE caxy_end + +caxy_tail: // do { + MOVSD (SI)(AX*8), X3 // X3 = { imag(x[i]), real(x[i]) } + MOVSHDUP_X3_X2 // X2 = { imag(x[i]), imag(x[i]) } + MOVSLDUP_X3_X3 // X3 = { real(x[i]), real(x[i]) } + MULPS X1, X2 // X2 = { real(a) * imag(x[i]), imag(a) * imag(x[i]) } + MULPS X0, X3 // X3 = { imag(a) * real(x[i]), real(a) * real(x[i]) } + + // X3 = { imag(a)*real(x[i]) + real(a)*imag(x[i]), + // real(a)*real(x[i]) - imag(a)*imag(x[i]) } + ADDSUBPS_X2_X3 + MOVSD (DI)(AX*8), X4 // X3 += y[i] + ADDPS X4, X3 + MOVSD X3, (DI)(AX*8) // y[i] = X3 + INCQ AX // ++i + LOOP caxy_tail // } while --CX > 0 + +caxy_end: + RET diff --git a/vendor/gonum.org/v1/gonum/internal/asm/c64/axpyunitaryto_amd64.s b/vendor/gonum.org/v1/gonum/internal/asm/c64/axpyunitaryto_amd64.s new file mode 100644 index 0000000000..2e80d8ca94 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/internal/asm/c64/axpyunitaryto_amd64.s @@ -0,0 +1,157 @@ +// Copyright ©2016 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build !noasm,!gccgo,!safe + +#include "textflag.h" + +// MOVSHDUP X3, X2 +#define MOVSHDUP_X3_X2 BYTE $0xF3; BYTE $0x0F; BYTE $0x16; BYTE $0xD3 +// MOVSLDUP X3, X3 +#define MOVSLDUP_X3_X3 BYTE $0xF3; BYTE $0x0F; BYTE $0x12; BYTE $0xDB +// ADDSUBPS X2, X3 +#define ADDSUBPS_X2_X3 BYTE $0xF2; BYTE $0x0F; BYTE $0xD0; BYTE $0xDA + +// MOVSHDUP X5, X4 +#define MOVSHDUP_X5_X4 BYTE $0xF3; BYTE $0x0F; BYTE $0x16; BYTE $0xE5 +// MOVSLDUP X5, X5 +#define MOVSLDUP_X5_X5 BYTE $0xF3; BYTE $0x0F; BYTE $0x12; BYTE $0xED +// ADDSUBPS X4, X5 +#define ADDSUBPS_X4_X5 BYTE $0xF2; BYTE $0x0F; BYTE $0xD0; BYTE $0xEC + +// MOVSHDUP X7, X6 +#define MOVSHDUP_X7_X6 BYTE $0xF3; BYTE $0x0F; BYTE $0x16; BYTE $0xF7 +// MOVSLDUP X7, X7 +#define MOVSLDUP_X7_X7 BYTE $0xF3; BYTE $0x0F; BYTE $0x12; BYTE $0xFF +// ADDSUBPS X6, X7 +#define ADDSUBPS_X6_X7 BYTE $0xF2; BYTE $0x0F; BYTE $0xD0; BYTE $0xFE + +// MOVSHDUP X9, X8 +#define MOVSHDUP_X9_X8 BYTE $0xF3; BYTE $0x45; BYTE $0x0F; BYTE $0x16; BYTE $0xC1 +// MOVSLDUP X9, X9 +#define MOVSLDUP_X9_X9 BYTE $0xF3; BYTE $0x45; BYTE $0x0F; BYTE $0x12; BYTE $0xC9 +// ADDSUBPS X8, X9 +#define ADDSUBPS_X8_X9 BYTE $0xF2; BYTE $0x45; BYTE $0x0F; BYTE $0xD0; BYTE $0xC8 + +// func AxpyUnitaryTo(dst []complex64, alpha complex64, x, y []complex64) +TEXT ·AxpyUnitaryTo(SB), NOSPLIT, $0 + MOVQ dst_base+0(FP), DI // DI = &dst + MOVQ x_base+32(FP), SI // SI = &x + MOVQ y_base+56(FP), DX // DX = &y + MOVQ x_len+40(FP), CX + CMPQ y_len+64(FP), CX // CX = min( len(x), len(y), len(dst) ) + CMOVQLE y_len+64(FP), CX + CMPQ dst_len+8(FP), CX + CMOVQLE dst_len+8(FP), CX + CMPQ CX, $0 // if CX == 0 { return } + JE caxy_end + MOVSD alpha+24(FP), X0 // X0 = { 0, 0, imag(a), real(a) } + SHUFPD $0, X0, X0 // X0 = { imag(a), real(a), imag(a), real(a) } + MOVAPS X0, X1 + SHUFPS $0x11, X1, X1 // X1 = { real(a), imag(a), real(a), imag(a) } + XORQ AX, AX // i = 0 + MOVQ DX, BX // Align on 16-byte boundary for ADDPS + ANDQ $15, BX // BX = &y & 15 + JZ caxy_no_trim // if BX == 0 { goto caxy_no_trim } + + MOVSD (SI)(AX*8), X3 // X3 = { imag(x[i]), real(x[i]) } + MOVSHDUP_X3_X2 // X2 = { imag(x[i]), imag(x[i]) } + MOVSLDUP_X3_X3 // X3 = { real(x[i]), real(x[i]) } + MULPS X1, X2 // X2 = { real(a) * imag(x[i]), imag(a) * imag(x[i]) } + MULPS X0, X3 // X3 = { imag(a) * real(x[i]), real(a) * real(x[i]) } + + // X3 = { imag(a)*real(x[i]) + real(a)*imag(x[i]), real(a)*real(x[i]) - imag(a)*imag(x[i]) } + ADDSUBPS_X2_X3 + MOVSD (DX)(AX*8), X4 // X3 += y[i] + ADDPS X4, X3 + MOVSD X3, (DI)(AX*8) // dst[i] = X3 + INCQ AX // i++ + DECQ CX // --CX + JZ caxy_tail // if BX == 0 { goto caxy_tail } + +caxy_no_trim: + MOVAPS X0, X10 // Copy X0 and X1 for pipelineing + MOVAPS X1, X11 + MOVQ CX, BX + ANDQ $7, CX // CX = n % 8 + SHRQ $3, BX // BX = floor( n / 8 ) + JZ caxy_tail // if BX == 0 { goto caxy_tail } + +caxy_loop: + // X_i = { imag(x[i]), real(x[i]), imag(x[i+1]), real(x[i+1]) } + MOVUPS (SI)(AX*8), X3 + MOVUPS 16(SI)(AX*8), X5 + MOVUPS 32(SI)(AX*8), X7 + MOVUPS 48(SI)(AX*8), X9 + + // X_(i-1) = { imag(x[i]), imag(x[i]), imag(x[i]+1), imag(x[i]+1) } + MOVSHDUP_X3_X2 + MOVSHDUP_X5_X4 + MOVSHDUP_X7_X6 + MOVSHDUP_X9_X8 + + // X_i = { real(x[i]), real(x[i]), real(x[i+1]), real(x[i+1]) } + MOVSLDUP_X3_X3 + MOVSLDUP_X5_X5 + MOVSLDUP_X7_X7 + MOVSLDUP_X9_X9 + + // X_i = { imag(a) * real(x[i]), real(a) * real(x[i]), + // imag(a) * real(x[i+1]), real(a) * real(x[i+1]) } + // X_(i-1) = { real(a) * imag(x[i]), imag(a) * imag(x[i]), + // real(a) * imag(x[i+1]), imag(a) * imag(x[i+1]) } + MULPS X1, X2 + MULPS X0, X3 + MULPS X11, X4 + MULPS X10, X5 + MULPS X1, X6 + MULPS X0, X7 + MULPS X11, X8 + MULPS X10, X9 + + // X_i = { + // imag(result[i]): imag(a)*real(x[i]) + real(a)*imag(x[i]), + // real(result[i]): real(a)*real(x[i]) - imag(a)*imag(x[i]), + // imag(result[i+1]): imag(a)*real(x[i+1]) + real(a)*imag(x[i+1]), + // real(result[i+1]): real(a)*real(x[i+1]) - imag(a)*imag(x[i+1]), + // } + ADDSUBPS_X2_X3 + ADDSUBPS_X4_X5 + ADDSUBPS_X6_X7 + ADDSUBPS_X8_X9 + + // X_i = { imag(result[i]) + imag(y[i]), real(result[i]) + real(y[i]), + // imag(result[i+1]) + imag(y[i+1]), real(result[i+1]) + real(y[i+1]) } + ADDPS (DX)(AX*8), X3 + ADDPS 16(DX)(AX*8), X5 + ADDPS 32(DX)(AX*8), X7 + ADDPS 48(DX)(AX*8), X9 + MOVUPS X3, (DI)(AX*8) // y[i:i+1] = X_i + MOVUPS X5, 16(DI)(AX*8) + MOVUPS X7, 32(DI)(AX*8) + MOVUPS X9, 48(DI)(AX*8) + ADDQ $8, AX // i += 8 + DECQ BX // --BX + JNZ caxy_loop // } while BX > 0 + CMPQ CX, $0 // if CX == 0 { return } + JE caxy_end + +caxy_tail: // do { + MOVSD (SI)(AX*8), X3 // X3 = { imag(x[i]), real(x[i]) } + MOVSHDUP_X3_X2 // X2 = { imag(x[i]), imag(x[i]) } + MOVSLDUP_X3_X3 // X3 = { real(x[i]), real(x[i]) } + MULPS X1, X2 // X2 = { real(a) * imag(x[i]), imag(a) * imag(x[i]) } + MULPS X0, X3 // X3 = { imag(a) * real(x[i]), real(a) * real(x[i]) } + + // X3 = { imag(a)*real(x[i]) + real(a)*imag(x[i]), + // real(a)*real(x[i]) - imag(a)*imag(x[i]) } + ADDSUBPS_X2_X3 + MOVSD (DX)(AX*8), X4 // X3 += y[i] + ADDPS X4, X3 + MOVSD X3, (DI)(AX*8) // y[i] = X3 + INCQ AX // ++i + LOOP caxy_tail // } while --CX > 0 + +caxy_end: + RET diff --git a/vendor/gonum.org/v1/gonum/internal/asm/c64/conj.go b/vendor/gonum.org/v1/gonum/internal/asm/c64/conj.go new file mode 100644 index 0000000000..910e1e5c73 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/internal/asm/c64/conj.go @@ -0,0 +1,7 @@ +// Copyright ©2015 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package c64 + +func conj(c complex64) complex64 { return complex(real(c), -imag(c)) } diff --git a/vendor/gonum.org/v1/gonum/internal/asm/c64/doc.go b/vendor/gonum.org/v1/gonum/internal/asm/c64/doc.go new file mode 100644 index 0000000000..35f1b2a26b --- /dev/null +++ b/vendor/gonum.org/v1/gonum/internal/asm/c64/doc.go @@ -0,0 +1,6 @@ +// Copyright ©2017 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package c64 provides complex64 vector primitives. +package c64 // import "gonum.org/v1/gonum/internal/asm/c64" diff --git a/vendor/gonum.org/v1/gonum/internal/asm/c64/dotcinc_amd64.s b/vendor/gonum.org/v1/gonum/internal/asm/c64/dotcinc_amd64.s new file mode 100644 index 0000000000..8efda0bb77 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/internal/asm/c64/dotcinc_amd64.s @@ -0,0 +1,160 @@ +// Copyright ©2016 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build !noasm,!gccgo,!safe + +#include "textflag.h" + +#define MOVSHDUP_X3_X2 LONG $0xD3160FF3 // MOVSHDUP X3, X2 +#define MOVSHDUP_X5_X4 LONG $0xE5160FF3 // MOVSHDUP X5, X4 +#define MOVSHDUP_X7_X6 LONG $0xF7160FF3 // MOVSHDUP X7, X6 +#define MOVSHDUP_X9_X8 LONG $0x160F45F3; BYTE $0xC1 // MOVSHDUP X9, X8 + +#define MOVSLDUP_X3_X3 LONG $0xDB120FF3 // MOVSLDUP X3, X3 +#define MOVSLDUP_X5_X5 LONG $0xED120FF3 // MOVSLDUP X5, X5 +#define MOVSLDUP_X7_X7 LONG $0xFF120FF3 // MOVSLDUP X7, X7 +#define MOVSLDUP_X9_X9 LONG $0x120F45F3; BYTE $0xC9 // MOVSLDUP X9, X9 + +#define ADDSUBPS_X2_X3 LONG $0xDAD00FF2 // ADDSUBPS X2, X3 +#define ADDSUBPS_X4_X5 LONG $0xECD00FF2 // ADDSUBPS X4, X5 +#define ADDSUBPS_X6_X7 LONG $0xFED00FF2 // ADDSUBPS X6, X7 +#define ADDSUBPS_X8_X9 LONG $0xD00F45F2; BYTE $0xC8 // ADDSUBPS X8, X9 + +#define X_PTR SI +#define Y_PTR DI +#define LEN CX +#define TAIL BX +#define SUM X0 +#define P_SUM X1 +#define INC_X R8 +#define INCx3_X R9 +#define INC_Y R10 +#define INCx3_Y R11 +#define NEG1 X15 +#define P_NEG1 X14 + +// func DotcInc(x, y []complex64, n, incX, incY, ix, iy uintptr) (sum complex64) +TEXT ·DotcInc(SB), NOSPLIT, $0 + MOVQ x_base+0(FP), X_PTR // X_PTR = &x + MOVQ y_base+24(FP), Y_PTR // Y_PTR = &y + PXOR SUM, SUM // SUM = 0 + PXOR P_SUM, P_SUM // P_SUM = 0 + MOVQ n+48(FP), LEN // LEN = n + CMPQ LEN, $0 // if LEN == 0 { return } + JE dotc_end + MOVQ ix+72(FP), INC_X + MOVQ iy+80(FP), INC_Y + LEAQ (X_PTR)(INC_X*8), X_PTR // X_PTR = &(X_PTR[ix]) + LEAQ (Y_PTR)(INC_Y*8), Y_PTR // Y_PTR = &(Y_PTR[iy]) + MOVQ incX+56(FP), INC_X // INC_X = incX * sizeof(complex64) + SHLQ $3, INC_X + MOVQ incY+64(FP), INC_Y // INC_Y = incY * sizeof(complex64) + SHLQ $3, INC_Y + MOVSS $(-1.0), NEG1 + SHUFPS $0, NEG1, NEG1 // { -1, -1, -1, -1 } + + MOVQ LEN, TAIL + ANDQ $3, TAIL // TAIL = LEN % 4 + SHRQ $2, LEN // LEN = floor( LEN / 4 ) + JZ dotc_tail // if LEN == 0 { goto dotc_tail } + + MOVUPS NEG1, P_NEG1 // Copy NEG1 for pipelining + LEAQ (INC_X)(INC_X*2), INCx3_X // INCx3_X = INC_X * 3 + LEAQ (INC_Y)(INC_Y*2), INCx3_Y // INCx3_Y = INC_Y * 3 + +dotc_loop: // do { + MOVSD (X_PTR), X3 // X_i = { imag(x[i]), real(x[i]) } + MOVSD (X_PTR)(INC_X*1), X5 + MOVSD (X_PTR)(INC_X*2), X7 + MOVSD (X_PTR)(INCx3_X*1), X9 + + // X_(i-1) = { imag(x[i]), imag(x[i]) } + MOVSHDUP_X3_X2 + MOVSHDUP_X5_X4 + MOVSHDUP_X7_X6 + MOVSHDUP_X9_X8 + + // X_i = { real(x[i]), real(x[i]) } + MOVSLDUP_X3_X3 + MOVSLDUP_X5_X5 + MOVSLDUP_X7_X7 + MOVSLDUP_X9_X9 + + // X_(i-1) = { -imag(x[i]), -imag(x[i]) } + MULPS NEG1, X2 + MULPS P_NEG1, X4 + MULPS NEG1, X6 + MULPS P_NEG1, X8 + + // X_j = { imag(y[i]), real(y[i]) } + MOVSD (Y_PTR), X10 + MOVSD (Y_PTR)(INC_Y*1), X11 + MOVSD (Y_PTR)(INC_Y*2), X12 + MOVSD (Y_PTR)(INCx3_Y*1), X13 + + // X_i = { imag(y[i]) * real(x[i]), real(y[i]) * real(x[i]) } + MULPS X10, X3 + MULPS X11, X5 + MULPS X12, X7 + MULPS X13, X9 + + // X_j = { real(y[i]), imag(y[i]) } + SHUFPS $0xB1, X10, X10 + SHUFPS $0xB1, X11, X11 + SHUFPS $0xB1, X12, X12 + SHUFPS $0xB1, X13, X13 + + // X_(i-1) = { real(y[i]) * imag(x[i]), imag(y[i]) * imag(x[i]) } + MULPS X10, X2 + MULPS X11, X4 + MULPS X12, X6 + MULPS X13, X8 + + // X_i = { + // imag(result[i]): imag(y[i]) * real(x[i]) + real(y[i]) * imag(x[i]), + // real(result[i]): real(y[i]) * real(x[i]) - imag(y[i]) * imag(x[i]) } + ADDSUBPS_X2_X3 + ADDSUBPS_X4_X5 + ADDSUBPS_X6_X7 + ADDSUBPS_X8_X9 + + // SUM += X_i + ADDPS X3, SUM + ADDPS X5, P_SUM + ADDPS X7, SUM + ADDPS X9, P_SUM + + LEAQ (X_PTR)(INC_X*4), X_PTR // X_PTR = &(X_PTR[INC_X*4]) + LEAQ (Y_PTR)(INC_Y*4), Y_PTR // Y_PTR = &(Y_PTR[INC_Y*4]) + + DECQ LEN + JNZ dotc_loop // } while --LEN > 0 + + ADDPS P_SUM, SUM // SUM = { P_SUM + SUM } + CMPQ TAIL, $0 // if TAIL == 0 { return } + JE dotc_end + +dotc_tail: // do { + MOVSD (X_PTR), X3 // X_i = { imag(x[i]), real(x[i]) } + MOVSHDUP_X3_X2 // X_(i-1) = { imag(x[i]), imag(x[i]) } + MOVSLDUP_X3_X3 // X_i = { real(x[i]), real(x[i]) } + MULPS NEG1, X2 // X_(i-1) = { -imag(x[i]), imag(x[i]) } + MOVUPS (Y_PTR), X10 // X_j = { imag(y[i]), real(y[i]) } + MULPS X10, X3 // X_i = { imag(y[i]) * real(x[i]), real(y[i]) * real(x[i]) } + SHUFPS $0x1, X10, X10 // X_j = { real(y[i]), imag(y[i]) } + MULPS X10, X2 // X_(i-1) = { real(y[i]) * imag(x[i]), imag(y[i]) * imag(x[i]) } + + // X_i = { + // imag(result[i]): imag(y[i])*real(x[i]) + real(y[i])*imag(x[i]), + // real(result[i]): real(y[i])*real(x[i]) - imag(y[i])*imag(x[i]) } + ADDSUBPS_X2_X3 + ADDPS X3, SUM // SUM += X_i + ADDQ INC_X, X_PTR // X_PTR += INC_X + ADDQ INC_Y, Y_PTR // Y_PTR += INC_Y + DECQ TAIL + JNZ dotc_tail // } while --TAIL > 0 + +dotc_end: + MOVSD SUM, sum+88(FP) // return SUM + RET diff --git a/vendor/gonum.org/v1/gonum/internal/asm/c64/dotcunitary_amd64.s b/vendor/gonum.org/v1/gonum/internal/asm/c64/dotcunitary_amd64.s new file mode 100644 index 0000000000..78f43eee06 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/internal/asm/c64/dotcunitary_amd64.s @@ -0,0 +1,208 @@ +// Copyright ©2017 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build !noasm,!gccgo,!safe + +#include "textflag.h" + +#define MOVSLDUP_XPTR_IDX_8__X3 LONG $0x1C120FF3; BYTE $0xC6 // MOVSLDUP (SI)(AX*8), X3 +#define MOVSLDUP_16_XPTR_IDX_8__X5 LONG $0x6C120FF3; WORD $0x10C6 // MOVSLDUP 16(SI)(AX*8), X5 +#define MOVSLDUP_32_XPTR_IDX_8__X7 LONG $0x7C120FF3; WORD $0x20C6 // MOVSLDUP 32(SI)(AX*8), X7 +#define MOVSLDUP_48_XPTR_IDX_8__X9 LONG $0x120F44F3; WORD $0xC64C; BYTE $0x30 // MOVSLDUP 48(SI)(AX*8), X9 + +#define MOVSHDUP_XPTR_IDX_8__X2 LONG $0x14160FF3; BYTE $0xC6 // MOVSHDUP (SI)(AX*8), X2 +#define MOVSHDUP_16_XPTR_IDX_8__X4 LONG $0x64160FF3; WORD $0x10C6 // MOVSHDUP 16(SI)(AX*8), X4 +#define MOVSHDUP_32_XPTR_IDX_8__X6 LONG $0x74160FF3; WORD $0x20C6 // MOVSHDUP 32(SI)(AX*8), X6 +#define MOVSHDUP_48_XPTR_IDX_8__X8 LONG $0x160F44F3; WORD $0xC644; BYTE $0x30 // MOVSHDUP 48(SI)(AX*8), X8 + +#define MOVSHDUP_X3_X2 LONG $0xD3160FF3 // MOVSHDUP X3, X2 +#define MOVSLDUP_X3_X3 LONG $0xDB120FF3 // MOVSLDUP X3, X3 + +#define ADDSUBPS_X2_X3 LONG $0xDAD00FF2 // ADDSUBPS X2, X3 +#define ADDSUBPS_X4_X5 LONG $0xECD00FF2 // ADDSUBPS X4, X5 +#define ADDSUBPS_X6_X7 LONG $0xFED00FF2 // ADDSUBPS X6, X7 +#define ADDSUBPS_X8_X9 LONG $0xD00F45F2; BYTE $0xC8 // ADDSUBPS X8, X9 + +#define X_PTR SI +#define Y_PTR DI +#define LEN CX +#define TAIL BX +#define SUM X0 +#define P_SUM X1 +#define IDX AX +#define I_IDX DX +#define NEG1 X15 +#define P_NEG1 X14 + +// func DotcUnitary(x, y []complex64) (sum complex64) +TEXT ·DotcUnitary(SB), NOSPLIT, $0 + MOVQ x_base+0(FP), X_PTR // X_PTR = &x + MOVQ y_base+24(FP), Y_PTR // Y_PTR = &y + PXOR SUM, SUM // SUM = 0 + PXOR P_SUM, P_SUM // P_SUM = 0 + MOVQ x_len+8(FP), LEN // LEN = min( len(x), len(y) ) + CMPQ y_len+32(FP), LEN + CMOVQLE y_len+32(FP), LEN + CMPQ LEN, $0 // if LEN == 0 { return } + JE dotc_end + XORQ IDX, IDX // i = 0 + MOVSS $(-1.0), NEG1 + SHUFPS $0, NEG1, NEG1 // { -1, -1, -1, -1 } + + MOVQ X_PTR, DX + ANDQ $15, DX // DX = &x & 15 + JZ dotc_aligned // if DX == 0 { goto dotc_aligned } + + MOVSD (X_PTR)(IDX*8), X3 // X_i = { imag(x[i]), real(x[i]) } + MOVSHDUP_X3_X2 // X_(i-1) = { imag(x[i]), imag(x[i]) } + MOVSLDUP_X3_X3 // X_i = { real(x[i]), real(x[i]) } + MOVSD (Y_PTR)(IDX*8), X10 // X_j = { imag(y[i]), real(y[i]) } + MULPS NEG1, X2 // X_(i-1) = { -imag(x[i]), imag(x[i]) } + MULPS X10, X3 // X_i = { imag(y[i]) * real(x[i]), real(y[i]) * real(x[i]) } + SHUFPS $0x1, X10, X10 // X_j = { real(y[i]), imag(y[i]) } + MULPS X10, X2 // X_(i-1) = { real(y[i]) * imag(x[i]), imag(y[i]) * imag(x[i]) } + + // X_i = { + // imag(result[i]): imag(y[i])*real(x[i]) + real(y[i])*imag(x[i]), + // real(result[i]): real(y[i])*real(x[i]) - imag(y[i])*imag(x[i]) } + ADDSUBPS_X2_X3 + + MOVAPS X3, SUM // SUM = X_i + INCQ IDX // IDX++ + DECQ LEN // LEN-- + JZ dotc_ret // if LEN == 0 { goto dotc_ret } + +dotc_aligned: + MOVQ LEN, TAIL + ANDQ $7, TAIL // TAIL = LEN % 8 + SHRQ $3, LEN // LEN = floor( LEN / 8 ) + JZ dotc_tail // if LEN == 0 { return } + MOVUPS NEG1, P_NEG1 // Copy NEG1 for pipelining + +dotc_loop: // do { + MOVSLDUP_XPTR_IDX_8__X3 // X_i = { real(x[i]), real(x[i]), real(x[i+1]), real(x[i+1]) } + MOVSLDUP_16_XPTR_IDX_8__X5 + MOVSLDUP_32_XPTR_IDX_8__X7 + MOVSLDUP_48_XPTR_IDX_8__X9 + + MOVSHDUP_XPTR_IDX_8__X2 // X_(i-1) = { imag(x[i]), imag(x[i]), imag(x[i+1]), imag(x[i+1]) } + MOVSHDUP_16_XPTR_IDX_8__X4 + MOVSHDUP_32_XPTR_IDX_8__X6 + MOVSHDUP_48_XPTR_IDX_8__X8 + + // X_j = { imag(y[i]), real(y[i]), imag(y[i+1]), real(y[i+1]) } + MOVUPS (Y_PTR)(IDX*8), X10 + MOVUPS 16(Y_PTR)(IDX*8), X11 + MOVUPS 32(Y_PTR)(IDX*8), X12 + MOVUPS 48(Y_PTR)(IDX*8), X13 + + // X_(i-1) = { -imag(x[i]), -imag(x[i]), -imag(x[i]+1), -imag(x[i]+1) } + MULPS NEG1, X2 + MULPS P_NEG1, X4 + MULPS NEG1, X6 + MULPS P_NEG1, X8 + + // X_i = { imag(y[i]) * real(x[i]), real(y[i]) * real(x[i]), + // imag(y[i+1]) * real(x[i+1]), real(y[i+1]) * real(x[i+1]) } + MULPS X10, X3 + MULPS X11, X5 + MULPS X12, X7 + MULPS X13, X9 + + // X_j = { real(y[i]), imag(y[i]), real(y[i+1]), imag(y[i+1]) } + SHUFPS $0xB1, X10, X10 + SHUFPS $0xB1, X11, X11 + SHUFPS $0xB1, X12, X12 + SHUFPS $0xB1, X13, X13 + + // X_(i-1) = { real(y[i]) * imag(x[i]), imag(y[i]) * imag(x[i]), + // real(y[i+1]) * imag(x[i+1]), imag(y[i+1]) * imag(x[i+1]) } + MULPS X10, X2 + MULPS X11, X4 + MULPS X12, X6 + MULPS X13, X8 + + // X_i = { + // imag(result[i]): imag(y[i]) * real(x[i]) + real(y[i]) * imag(x[i]), + // real(result[i]): real(y[i]) * real(x[i]) - imag(y[i]) * imag(x[i]), + // imag(result[i+1]): imag(y[i+1]) * real(x[i+1]) + real(y[i+1]) * imag(x[i+1]), + // real(result[i+1]): real(y[i+1]) * real(x[i+1]) - imag(y[i+1]) * imag(x[i+1]), + // } + ADDSUBPS_X2_X3 + ADDSUBPS_X4_X5 + ADDSUBPS_X6_X7 + ADDSUBPS_X8_X9 + + // SUM += X_i + ADDPS X3, SUM + ADDPS X5, P_SUM + ADDPS X7, SUM + ADDPS X9, P_SUM + + ADDQ $8, IDX // IDX += 8 + DECQ LEN + JNZ dotc_loop // } while --LEN > 0 + + ADDPS SUM, P_SUM // P_SUM = { P_SUM[1] + SUM[1], P_SUM[0] + SUM[0] } + XORPS SUM, SUM // SUM = 0 + + CMPQ TAIL, $0 // if TAIL == 0 { return } + JE dotc_end + +dotc_tail: + MOVQ TAIL, LEN + SHRQ $1, LEN // LEN = floor( LEN / 2 ) + JZ dotc_tail_one // if LEN == 0 { goto dotc_tail_one } + +dotc_tail_two: // do { + MOVSLDUP_XPTR_IDX_8__X3 // X_i = { real(x[i]), real(x[i]), real(x[i+1]), real(x[i+1]) } + MOVSHDUP_XPTR_IDX_8__X2 // X_(i-1) = { imag(x[i]), imag(x[i]), imag(x[i]+1), imag(x[i]+1) } + MOVUPS (Y_PTR)(IDX*8), X10 // X_j = { imag(y[i]), real(y[i]) } + MULPS NEG1, X2 // X_(i-1) = { -imag(x[i]), imag(x[i]) } + MULPS X10, X3 // X_i = { imag(y[i]) * real(x[i]), real(y[i]) * real(x[i]) } + SHUFPS $0xB1, X10, X10 // X_j = { real(y[i]), imag(y[i]) } + MULPS X10, X2 // X_(i-1) = { real(y[i]) * imag(x[i]), imag(y[i]) * imag(x[i]) } + + // X_i = { + // imag(result[i]): imag(y[i])*real(x[i]) + real(y[i])*imag(x[i]), + // real(result[i]): real(y[i])*real(x[i]) - imag(y[i])*imag(x[i]) } + ADDSUBPS_X2_X3 + + ADDPS X3, SUM // SUM += X_i + + ADDQ $2, IDX // IDX += 2 + DECQ LEN + JNZ dotc_tail_two // } while --LEN > 0 + + ADDPS SUM, P_SUM // P_SUM = { P_SUM[1] + SUM[1], P_SUM[0] + SUM[0] } + XORPS SUM, SUM // SUM = 0 + + ANDQ $1, TAIL + JZ dotc_end + +dotc_tail_one: + MOVSD (X_PTR)(IDX*8), X3 // X_i = { imag(x[i]), real(x[i]) } + MOVSHDUP_X3_X2 // X_(i-1) = { imag(x[i]), imag(x[i]) } + MOVSLDUP_X3_X3 // X_i = { real(x[i]), real(x[i]) } + MOVSD (Y_PTR)(IDX*8), X10 // X_j = { imag(y[i]), real(y[i]) } + MULPS NEG1, X2 // X_(i-1) = { -imag(x[i]), imag(x[i]) } + MULPS X10, X3 // X_i = { imag(y[i]) * real(x[i]), real(y[i]) * real(x[i]) } + SHUFPS $0x1, X10, X10 // X_j = { real(y[i]), imag(y[i]) } + MULPS X10, X2 // X_(i-1) = { real(y[i]) * imag(x[i]), imag(y[i]) * imag(x[i]) } + + // X_i = { + // imag(result[i]): imag(y[i])*real(x[i]) + real(y[i])*imag(x[i]), + // real(result[i]): real(y[i])*real(x[i]) - imag(y[i])*imag(x[i]) } + ADDSUBPS_X2_X3 + + ADDPS X3, SUM // SUM += X_i + +dotc_end: + ADDPS P_SUM, SUM // SUM = { P_SUM[0] + SUM[0] } + MOVHLPS P_SUM, P_SUM // P_SUM = { P_SUM[1], P_SUM[1] } + ADDPS P_SUM, SUM // SUM = { P_SUM[1] + SUM[0] } + +dotc_ret: + MOVSD SUM, sum+48(FP) // return SUM + RET diff --git a/vendor/gonum.org/v1/gonum/internal/asm/c64/dotuinc_amd64.s b/vendor/gonum.org/v1/gonum/internal/asm/c64/dotuinc_amd64.s new file mode 100644 index 0000000000..3dc2e144a8 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/internal/asm/c64/dotuinc_amd64.s @@ -0,0 +1,148 @@ +// Copyright ©2016 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build !noasm,!gccgo,!safe + +#include "textflag.h" + +#define MOVSHDUP_X3_X2 LONG $0xD3160FF3 // MOVSHDUP X3, X2 +#define MOVSHDUP_X5_X4 LONG $0xE5160FF3 // MOVSHDUP X5, X4 +#define MOVSHDUP_X7_X6 LONG $0xF7160FF3 // MOVSHDUP X7, X6 +#define MOVSHDUP_X9_X8 LONG $0x160F45F3; BYTE $0xC1 // MOVSHDUP X9, X8 + +#define MOVSLDUP_X3_X3 LONG $0xDB120FF3 // MOVSLDUP X3, X3 +#define MOVSLDUP_X5_X5 LONG $0xED120FF3 // MOVSLDUP X5, X5 +#define MOVSLDUP_X7_X7 LONG $0xFF120FF3 // MOVSLDUP X7, X7 +#define MOVSLDUP_X9_X9 LONG $0x120F45F3; BYTE $0xC9 // MOVSLDUP X9, X9 + +#define ADDSUBPS_X2_X3 LONG $0xDAD00FF2 // ADDSUBPS X2, X3 +#define ADDSUBPS_X4_X5 LONG $0xECD00FF2 // ADDSUBPS X4, X5 +#define ADDSUBPS_X6_X7 LONG $0xFED00FF2 // ADDSUBPS X6, X7 +#define ADDSUBPS_X8_X9 LONG $0xD00F45F2; BYTE $0xC8 // ADDSUBPS X8, X9 + +#define X_PTR SI +#define Y_PTR DI +#define LEN CX +#define TAIL BX +#define SUM X0 +#define P_SUM X1 +#define INC_X R8 +#define INCx3_X R9 +#define INC_Y R10 +#define INCx3_Y R11 + +// func DotuInc(x, y []complex64, n, incX, incY, ix, iy uintptr) (sum complex64) +TEXT ·DotuInc(SB), NOSPLIT, $0 + MOVQ x_base+0(FP), X_PTR // X_PTR = &x + MOVQ y_base+24(FP), Y_PTR // Y_PTR = &y + PXOR SUM, SUM // SUM = 0 + PXOR P_SUM, P_SUM // P_SUM = 0 + MOVQ n+48(FP), LEN // LEN = n + CMPQ LEN, $0 // if LEN == 0 { return } + JE dotu_end + MOVQ ix+72(FP), INC_X + MOVQ iy+80(FP), INC_Y + LEAQ (X_PTR)(INC_X*8), X_PTR // X_PTR = &(X_PTR[ix]) + LEAQ (Y_PTR)(INC_Y*8), Y_PTR // Y_PTR = &(Y_PTR[iy]) + MOVQ incX+56(FP), INC_X // INC_X = incX * sizeof(complex64) + SHLQ $3, INC_X + MOVQ incY+64(FP), INC_Y // INC_Y = incY * sizeof(complex64) + SHLQ $3, INC_Y + + MOVQ LEN, TAIL + ANDQ $3, TAIL // TAIL = LEN % 4 + SHRQ $2, LEN // LEN = floor( LEN / 4 ) + JZ dotu_tail // if TAIL == 0 { goto dotu_tail } + + LEAQ (INC_X)(INC_X*2), INCx3_X // INCx3_X = INC_X * 3 + LEAQ (INC_Y)(INC_Y*2), INCx3_Y // INCx3_Y = INC_Y * 3 + +dotu_loop: // do { + MOVSD (X_PTR), X3 // X_i = { imag(x[i]), real(x[i]) } + MOVSD (X_PTR)(INC_X*1), X5 + MOVSD (X_PTR)(INC_X*2), X7 + MOVSD (X_PTR)(INCx3_X*1), X9 + + // X_(i-1) = { imag(x[i]), imag(x[i]) } + MOVSHDUP_X3_X2 + MOVSHDUP_X5_X4 + MOVSHDUP_X7_X6 + MOVSHDUP_X9_X8 + + // X_i = { real(x[i]), real(x[i]) } + MOVSLDUP_X3_X3 + MOVSLDUP_X5_X5 + MOVSLDUP_X7_X7 + MOVSLDUP_X9_X9 + + // X_j = { imag(y[i]), real(y[i]) } + MOVSD (Y_PTR), X10 + MOVSD (Y_PTR)(INC_Y*1), X11 + MOVSD (Y_PTR)(INC_Y*2), X12 + MOVSD (Y_PTR)(INCx3_Y*1), X13 + + // X_i = { imag(y[i]) * real(x[i]), real(y[i]) * real(x[i]) } + MULPS X10, X3 + MULPS X11, X5 + MULPS X12, X7 + MULPS X13, X9 + + // X_j = { real(y[i]), imag(y[i]) } + SHUFPS $0xB1, X10, X10 + SHUFPS $0xB1, X11, X11 + SHUFPS $0xB1, X12, X12 + SHUFPS $0xB1, X13, X13 + + // X_(i-1) = { real(y[i]) * imag(x[i]), imag(y[i]) * imag(x[i]) } + MULPS X10, X2 + MULPS X11, X4 + MULPS X12, X6 + MULPS X13, X8 + + // X_i = { + // imag(result[i]): imag(y[i]) * real(x[i]) + real(y[i]) * imag(x[i]), + // real(result[i]): real(y[i]) * real(x[i]) - imag(y[i]) * imag(x[i]) } + ADDSUBPS_X2_X3 + ADDSUBPS_X4_X5 + ADDSUBPS_X6_X7 + ADDSUBPS_X8_X9 + + // SUM += X_i + ADDPS X3, SUM + ADDPS X5, P_SUM + ADDPS X7, SUM + ADDPS X9, P_SUM + + LEAQ (X_PTR)(INC_X*4), X_PTR // X_PTR = &(X_PTR[INC_X*4]) + LEAQ (Y_PTR)(INC_Y*4), Y_PTR // Y_PTR = &(Y_PTR[INC_Y*4]) + + DECQ LEN + JNZ dotu_loop // } while --LEN > 0 + + ADDPS P_SUM, SUM // SUM = { P_SUM + SUM } + CMPQ TAIL, $0 // if TAIL == 0 { return } + JE dotu_end + +dotu_tail: // do { + MOVSD (X_PTR), X3 // X_i = { imag(x[i]), real(x[i]) } + MOVSHDUP_X3_X2 // X_(i-1) = { imag(x[i]), imag(x[i]) } + MOVSLDUP_X3_X3 // X_i = { real(x[i]), real(x[i]) } + MOVUPS (Y_PTR), X10 // X_j = { imag(y[i]), real(y[i]) } + MULPS X10, X3 // X_i = { imag(y[i]) * real(x[i]), real(y[i]) * real(x[i]) } + SHUFPS $0x1, X10, X10 // X_j = { real(y[i]), imag(y[i]) } + MULPS X10, X2 // X_(i-1) = { real(y[i]) * imag(x[i]), imag(y[i]) * imag(x[i]) } + + // X_i = { + // imag(result[i]): imag(y[i])*real(x[i]) + real(y[i])*imag(x[i]), + // real(result[i]): real(y[i])*real(x[i]) - imag(y[i])*imag(x[i]) } + ADDSUBPS_X2_X3 + ADDPS X3, SUM // SUM += X_i + ADDQ INC_X, X_PTR // X_PTR += INC_X + ADDQ INC_Y, Y_PTR // Y_PTR += INC_Y + DECQ TAIL + JNZ dotu_tail // } while --TAIL > 0 + +dotu_end: + MOVSD SUM, sum+88(FP) // return SUM + RET diff --git a/vendor/gonum.org/v1/gonum/internal/asm/c64/dotuunitary_amd64.s b/vendor/gonum.org/v1/gonum/internal/asm/c64/dotuunitary_amd64.s new file mode 100644 index 0000000000..f11c6de78f --- /dev/null +++ b/vendor/gonum.org/v1/gonum/internal/asm/c64/dotuunitary_amd64.s @@ -0,0 +1,197 @@ +// Copyright ©2017 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build !noasm,!gccgo,!safe + +#include "textflag.h" + +#define MOVSLDUP_XPTR_IDX_8__X3 LONG $0x1C120FF3; BYTE $0xC6 // MOVSLDUP (SI)(AX*8), X3 +#define MOVSLDUP_16_XPTR_IDX_8__X5 LONG $0x6C120FF3; WORD $0x10C6 // MOVSLDUP 16(SI)(AX*8), X5 +#define MOVSLDUP_32_XPTR_IDX_8__X7 LONG $0x7C120FF3; WORD $0x20C6 // MOVSLDUP 32(SI)(AX*8), X7 +#define MOVSLDUP_48_XPTR_IDX_8__X9 LONG $0x120F44F3; WORD $0xC64C; BYTE $0x30 // MOVSLDUP 48(SI)(AX*8), X9 + +#define MOVSHDUP_XPTR_IDX_8__X2 LONG $0x14160FF3; BYTE $0xC6 // MOVSHDUP (SI)(AX*8), X2 +#define MOVSHDUP_16_XPTR_IDX_8__X4 LONG $0x64160FF3; WORD $0x10C6 // MOVSHDUP 16(SI)(AX*8), X4 +#define MOVSHDUP_32_XPTR_IDX_8__X6 LONG $0x74160FF3; WORD $0x20C6 // MOVSHDUP 32(SI)(AX*8), X6 +#define MOVSHDUP_48_XPTR_IDX_8__X8 LONG $0x160F44F3; WORD $0xC644; BYTE $0x30 // MOVSHDUP 48(SI)(AX*8), X8 + +#define MOVSHDUP_X3_X2 LONG $0xD3160FF3 // MOVSHDUP X3, X2 +#define MOVSLDUP_X3_X3 LONG $0xDB120FF3 // MOVSLDUP X3, X3 + +#define ADDSUBPS_X2_X3 LONG $0xDAD00FF2 // ADDSUBPS X2, X3 +#define ADDSUBPS_X4_X5 LONG $0xECD00FF2 // ADDSUBPS X4, X5 +#define ADDSUBPS_X6_X7 LONG $0xFED00FF2 // ADDSUBPS X6, X7 +#define ADDSUBPS_X8_X9 LONG $0xD00F45F2; BYTE $0xC8 // ADDSUBPS X8, X9 + +#define X_PTR SI +#define Y_PTR DI +#define LEN CX +#define TAIL BX +#define SUM X0 +#define P_SUM X1 +#define IDX AX +#define I_IDX DX +#define NEG1 X15 +#define P_NEG1 X14 + +// func DotuUnitary(x, y []complex64) (sum complex64) +TEXT ·DotuUnitary(SB), NOSPLIT, $0 + MOVQ x_base+0(FP), X_PTR // X_PTR = &x + MOVQ y_base+24(FP), Y_PTR // Y_PTR = &y + PXOR SUM, SUM // SUM = 0 + PXOR P_SUM, P_SUM // P_SUM = 0 + MOVQ x_len+8(FP), LEN // LEN = min( len(x), len(y) ) + CMPQ y_len+32(FP), LEN + CMOVQLE y_len+32(FP), LEN + CMPQ LEN, $0 // if LEN == 0 { return } + JE dotu_end + XORQ IDX, IDX // IDX = 0 + + MOVQ X_PTR, DX + ANDQ $15, DX // DX = &x & 15 + JZ dotu_aligned // if DX == 0 { goto dotu_aligned } + + MOVSD (X_PTR)(IDX*8), X3 // X_i = { imag(x[i]), real(x[i]) } + MOVSHDUP_X3_X2 // X_(i-1) = { imag(x[i]), imag(x[i]) } + MOVSLDUP_X3_X3 // X_i = { real(x[i]), real(x[i]) } + MOVSD (Y_PTR)(IDX*8), X10 // X_j = { imag(y[i]), real(y[i]) } + MULPS X10, X3 // X_i = { imag(y[i]) * real(x[i]), real(y[i]) * real(x[i]) } + SHUFPS $0x1, X10, X10 // X_j = { real(y[i]), imag(y[i]) } + MULPS X10, X2 // X_(i-1) = { real(y[i]) * imag(x[i]), imag(y[i]) * imag(x[i]) } + + // X_i = { + // imag(result[i]): imag(y[i])*real(x[i]) + real(y[i])*imag(x[i]), + // real(result[i]): real(y[i])*real(x[i]) - imag(y[i])*imag(x[i]) } + ADDSUBPS_X2_X3 + + MOVAPS X3, SUM // SUM = X_i + INCQ IDX // IDX++ + DECQ LEN // LEN-- + JZ dotu_end // if LEN == 0 { goto dotu_end } + +dotu_aligned: + MOVQ LEN, TAIL + ANDQ $7, TAIL // TAIL = LEN % 8 + SHRQ $3, LEN // LEN = floor( LEN / 8 ) + JZ dotu_tail // if LEN == 0 { goto dotu_tail } + PXOR P_SUM, P_SUM + +dotu_loop: // do { + MOVSLDUP_XPTR_IDX_8__X3 // X_i = { real(x[i]), real(x[i]), real(x[i+1]), real(x[i+1]) } + MOVSLDUP_16_XPTR_IDX_8__X5 + MOVSLDUP_32_XPTR_IDX_8__X7 + MOVSLDUP_48_XPTR_IDX_8__X9 + + MOVSHDUP_XPTR_IDX_8__X2 // X_(i-1) = { imag(x[i]), imag(x[i]), imag(x[i]+1), imag(x[i]+1) } + MOVSHDUP_16_XPTR_IDX_8__X4 + MOVSHDUP_32_XPTR_IDX_8__X6 + MOVSHDUP_48_XPTR_IDX_8__X8 + + // X_j = { imag(y[i]), real(y[i]), imag(y[i+1]), real(y[i+1]) } + MOVUPS (Y_PTR)(IDX*8), X10 + MOVUPS 16(Y_PTR)(IDX*8), X11 + MOVUPS 32(Y_PTR)(IDX*8), X12 + MOVUPS 48(Y_PTR)(IDX*8), X13 + + // X_i = { imag(y[i]) * real(x[i]), real(y[i]) * real(x[i]), + // imag(y[i+1]) * real(x[i+1]), real(y[i+1]) * real(x[i+1]) } + MULPS X10, X3 + MULPS X11, X5 + MULPS X12, X7 + MULPS X13, X9 + + // X_j = { real(y[i]), imag(y[i]), real(y[i+1]), imag(y[i+1]) } + SHUFPS $0xB1, X10, X10 + SHUFPS $0xB1, X11, X11 + SHUFPS $0xB1, X12, X12 + SHUFPS $0xB1, X13, X13 + + // X_(i-1) = { real(y[i]) * imag(x[i]), imag(y[i]) * imag(x[i]), + // real(y[i+1]) * imag(x[i+1]), imag(y[i+1]) * imag(x[i+1]) } + MULPS X10, X2 + MULPS X11, X4 + MULPS X12, X6 + MULPS X13, X8 + + // X_i = { + // imag(result[i]): imag(y[i]) * real(x[i]) + real(y[i]) * imag(x[i]), + // real(result[i]): real(y[i]) * real(x[i]) - imag(y[i]) * imag(x[i]), + // imag(result[i+1]): imag(y[i+1]) * real(x[i+1]) + real(y[i+1]) * imag(x[i+1]), + // real(result[i+1]): real(y[i+1]) * real(x[i+1]) - imag(y[i+1]) * imag(x[i+1]), + // } + ADDSUBPS_X2_X3 + ADDSUBPS_X4_X5 + ADDSUBPS_X6_X7 + ADDSUBPS_X8_X9 + + // SUM += X_i + ADDPS X3, SUM + ADDPS X5, P_SUM + ADDPS X7, SUM + ADDPS X9, P_SUM + + ADDQ $8, IDX // IDX += 8 + DECQ LEN + JNZ dotu_loop // } while --LEN > 0 + + ADDPS SUM, P_SUM // P_SUM = { P_SUM[1] + SUM[1], P_SUM[0] + SUM[0] } + XORPS SUM, SUM // SUM = 0 + + CMPQ TAIL, $0 // if TAIL == 0 { return } + JE dotu_end + +dotu_tail: + MOVQ TAIL, LEN + SHRQ $1, LEN // LEN = floor( LEN / 2 ) + JZ dotu_tail_one // if LEN == 0 { goto dotc_tail_one } + +dotu_tail_two: // do { + MOVSLDUP_XPTR_IDX_8__X3 // X_i = { real(x[i]), real(x[i]), real(x[i+1]), real(x[i+1]) } + MOVSHDUP_XPTR_IDX_8__X2 // X_(i-1) = { imag(x[i]), imag(x[i]), imag(x[i]+1), imag(x[i]+1) } + MOVUPS (Y_PTR)(IDX*8), X10 // X_j = { imag(y[i]), real(y[i]) } + MULPS X10, X3 // X_i = { imag(y[i]) * real(x[i]), real(y[i]) * real(x[i]) } + SHUFPS $0xB1, X10, X10 // X_j = { real(y[i]), imag(y[i]) } + MULPS X10, X2 // X_(i-1) = { real(y[i]) * imag(x[i]), imag(y[i]) * imag(x[i]) } + + // X_i = { + // imag(result[i]): imag(y[i])*real(x[i]) + real(y[i])*imag(x[i]), + // real(result[i]): real(y[i])*real(x[i]) - imag(y[i])*imag(x[i]) } + ADDSUBPS_X2_X3 + + ADDPS X3, SUM // SUM += X_i + + ADDQ $2, IDX // IDX += 2 + DECQ LEN + JNZ dotu_tail_two // } while --LEN > 0 + + ADDPS SUM, P_SUM // P_SUM = { P_SUM[1] + SUM[1], P_SUM[0] + SUM[0] } + XORPS SUM, SUM // SUM = 0 + + ANDQ $1, TAIL + JZ dotu_end + +dotu_tail_one: + MOVSD (X_PTR)(IDX*8), X3 // X_i = { imag(x[i]), real(x[i]) } + MOVSHDUP_X3_X2 // X_(i-1) = { imag(x[i]), imag(x[i]) } + MOVSLDUP_X3_X3 // X_i = { real(x[i]), real(x[i]) } + MOVSD (Y_PTR)(IDX*8), X10 // X_j = { imag(y[i]), real(y[i]) } + MULPS X10, X3 // X_i = { imag(y[i]) * real(x[i]), real(y[i]) * real(x[i]) } + SHUFPS $0x1, X10, X10 // X_j = { real(y[i]), imag(y[i]) } + MULPS X10, X2 // X_(i-1) = { real(y[i]) * imag(x[i]), imag(y[i]) * imag(x[i]) } + + // X_i = { + // imag(result[i]): imag(y[i])*real(x[i]) + real(y[i])*imag(x[i]), + // real(result[i]): real(y[i])*real(x[i]) - imag(y[i])*imag(x[i]) } + ADDSUBPS_X2_X3 + + ADDPS X3, SUM // SUM += X_i + +dotu_end: + ADDPS P_SUM, SUM // SUM = { P_SUM[0] + SUM[0] } + MOVHLPS P_SUM, P_SUM // P_SUM = { P_SUM[1], P_SUM[1] } + ADDPS P_SUM, SUM // SUM = { P_SUM[1] + SUM[0] } + +dotu_ret: + MOVSD SUM, sum+48(FP) // return SUM + RET diff --git a/vendor/gonum.org/v1/gonum/internal/asm/c64/scal.go b/vendor/gonum.org/v1/gonum/internal/asm/c64/scal.go new file mode 100644 index 0000000000..6db0aa36f3 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/internal/asm/c64/scal.go @@ -0,0 +1,85 @@ +// Copyright ©2016 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package c64 + +// ScalUnitary is +// +// for i := range x { +// x[i] *= alpha +// } +func ScalUnitary(alpha complex64, x []complex64) { + for i := range x { + x[i] *= alpha + } +} + +// ScalUnitaryTo is +// +// for i, v := range x { +// dst[i] = alpha * v +// } +func ScalUnitaryTo(dst []complex64, alpha complex64, x []complex64) { + for i, v := range x { + dst[i] = alpha * v + } +} + +// ScalInc is +// +// var ix uintptr +// for i := 0; i < int(n); i++ { +// x[ix] *= alpha +// ix += incX +// } +func ScalInc(alpha complex64, x []complex64, n, incX uintptr) { + var ix uintptr + for i := 0; i < int(n); i++ { + x[ix] *= alpha + ix += incX + } +} + +// ScalIncTo is +// +// var idst, ix uintptr +// for i := 0; i < int(n); i++ { +// dst[idst] = alpha * x[ix] +// ix += incX +// idst += incDst +// } +func ScalIncTo(dst []complex64, incDst uintptr, alpha complex64, x []complex64, n, incX uintptr) { + var idst, ix uintptr + for i := 0; i < int(n); i++ { + dst[idst] = alpha * x[ix] + ix += incX + idst += incDst + } +} + +// SscalUnitary is +// +// for i, v := range x { +// x[i] = complex(real(v)*alpha, imag(v)*alpha) +// } +func SscalUnitary(alpha float32, x []complex64) { + for i, v := range x { + x[i] = complex(real(v)*alpha, imag(v)*alpha) + } +} + +// SscalInc is +// +// var ix uintptr +// for i := 0; i < int(n); i++ { +// x[ix] = complex(real(x[ix])*alpha, imag(x[ix])*alpha) +// ix += inc +// } +func SscalInc(alpha float32, x []complex64, n, inc uintptr) { + var ix uintptr + for i := 0; i < int(n); i++ { + x[ix] = complex(real(x[ix])*alpha, imag(x[ix])*alpha) + ix += inc + } +} diff --git a/vendor/gonum.org/v1/gonum/internal/asm/c64/stubs.go b/vendor/gonum.org/v1/gonum/internal/asm/c64/stubs.go new file mode 100644 index 0000000000..0aa626e141 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/internal/asm/c64/stubs.go @@ -0,0 +1,180 @@ +// Copyright ©2020 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package c64 + +import ( + "gonum.org/v1/gonum/internal/cmplx64" + "gonum.org/v1/gonum/internal/math32" +) + +// Add is +// +// for i, v := range s { +// dst[i] += v +// } +func Add(dst, s []complex64) { + for i, v := range s { + dst[i] += v + } +} + +// AddConst is +// +// for i := range x { +// x[i] += alpha +// } +func AddConst(alpha complex64, x []complex64) { + for i := range x { + x[i] += alpha + } +} + +// CumSum is +// +// if len(s) == 0 { +// return dst +// } +// dst[0] = s[0] +// for i, v := range s[1:] { +// dst[i+1] = dst[i] + v +// } +// return dst +func CumSum(dst, s []complex64) []complex64 { + if len(s) == 0 { + return dst + } + dst[0] = s[0] + for i, v := range s[1:] { + dst[i+1] = dst[i] + v + } + return dst +} + +// CumProd is +// +// if len(s) == 0 { +// return dst +// } +// dst[0] = s[0] +// for i, v := range s[1:] { +// dst[i+1] = dst[i] * v +// } +// return dst +func CumProd(dst, s []complex64) []complex64 { + if len(s) == 0 { + return dst + } + dst[0] = s[0] + for i, v := range s[1:] { + dst[i+1] = dst[i] * v + } + return dst +} + +// Div is +// +// for i, v := range s { +// dst[i] /= v +// } +func Div(dst, s []complex64) { + for i, v := range s { + dst[i] /= v + } +} + +// DivTo is +// +// for i, v := range s { +// dst[i] = v / t[i] +// } +// return dst +func DivTo(dst, s, t []complex64) []complex64 { + for i, v := range s { + dst[i] = v / t[i] + } + return dst +} + +// DotUnitary is +// +// for i, v := range x { +// sum += conj(v) * y[i] +// } +// return sum +func DotUnitary(x, y []complex64) (sum complex64) { + for i, v := range x { + sum += cmplx64.Conj(v) * y[i] + } + return sum +} + +// L2DistanceUnitary returns the L2-norm of x-y. +func L2DistanceUnitary(x, y []complex64) (norm float32) { + var scale float32 + sumSquares := float32(1.0) + for i, v := range x { + v -= y[i] + if v == 0 { + continue + } + absxi := cmplx64.Abs(v) + if math32.IsNaN(absxi) { + return math32.NaN() + } + if scale < absxi { + s := scale / absxi + sumSquares = 1 + sumSquares*s*s + scale = absxi + } else { + s := absxi / scale + sumSquares += s * s + } + } + if math32.IsInf(scale, 1) { + return math32.Inf(1) + } + return scale * math32.Sqrt(sumSquares) +} + +// L2NormUnitary returns the L2-norm of x. +func L2NormUnitary(x []complex64) (norm float32) { + var scale float32 + sumSquares := float32(1.0) + for _, v := range x { + if v == 0 { + continue + } + absxi := cmplx64.Abs(v) + if math32.IsNaN(absxi) { + return math32.NaN() + } + if scale < absxi { + s := scale / absxi + sumSquares = 1 + sumSquares*s*s + scale = absxi + } else { + s := absxi / scale + sumSquares += s * s + } + } + if math32.IsInf(scale, 1) { + return math32.Inf(1) + } + return scale * math32.Sqrt(sumSquares) +} + +// Sum is +// +// var sum complex64 +// for i := range x { +// sum += x[i] +// } +func Sum(x []complex64) complex64 { + var sum complex64 + for _, v := range x { + sum += v + } + return sum +} diff --git a/vendor/gonum.org/v1/gonum/internal/asm/c64/stubs_amd64.go b/vendor/gonum.org/v1/gonum/internal/asm/c64/stubs_amd64.go new file mode 100644 index 0000000000..71367b016f --- /dev/null +++ b/vendor/gonum.org/v1/gonum/internal/asm/c64/stubs_amd64.go @@ -0,0 +1,77 @@ +// Copyright ©2016 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build !noasm && !gccgo && !safe +// +build !noasm,!gccgo,!safe + +package c64 + +// AxpyUnitary is +// +// for i, v := range x { +// y[i] += alpha * v +// } +func AxpyUnitary(alpha complex64, x, y []complex64) + +// AxpyUnitaryTo is +// +// for i, v := range x { +// dst[i] = alpha*v + y[i] +// } +func AxpyUnitaryTo(dst []complex64, alpha complex64, x, y []complex64) + +// AxpyInc is +// +// for i := 0; i < int(n); i++ { +// y[iy] += alpha * x[ix] +// ix += incX +// iy += incY +// } +func AxpyInc(alpha complex64, x, y []complex64, n, incX, incY, ix, iy uintptr) + +// AxpyIncTo is +// +// for i := 0; i < int(n); i++ { +// dst[idst] = alpha*x[ix] + y[iy] +// ix += incX +// iy += incY +// idst += incDst +// } +func AxpyIncTo(dst []complex64, incDst, idst uintptr, alpha complex64, x, y []complex64, n, incX, incY, ix, iy uintptr) + +// DotcUnitary is +// +// for i, v := range x { +// sum += y[i] * conj(v) +// } +// return sum +func DotcUnitary(x, y []complex64) (sum complex64) + +// DotcInc is +// +// for i := 0; i < int(n); i++ { +// sum += y[iy] * conj(x[ix]) +// ix += incX +// iy += incY +// } +// return sum +func DotcInc(x, y []complex64, n, incX, incY, ix, iy uintptr) (sum complex64) + +// DotuUnitary is +// +// for i, v := range x { +// sum += y[i] * v +// } +// return sum +func DotuUnitary(x, y []complex64) (sum complex64) + +// DotuInc is +// +// for i := 0; i < int(n); i++ { +// sum += y[iy] * x[ix] +// ix += incX +// iy += incY +// } +// return sum +func DotuInc(x, y []complex64, n, incX, incY, ix, iy uintptr) (sum complex64) diff --git a/vendor/gonum.org/v1/gonum/internal/asm/c64/stubs_noasm.go b/vendor/gonum.org/v1/gonum/internal/asm/c64/stubs_noasm.go new file mode 100644 index 0000000000..0d79b24fc8 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/internal/asm/c64/stubs_noasm.go @@ -0,0 +1,122 @@ +// Copyright ©2016 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build !amd64 || noasm || gccgo || safe +// +build !amd64 noasm gccgo safe + +package c64 + +// AxpyUnitary is +// +// for i, v := range x { +// y[i] += alpha * v +// } +func AxpyUnitary(alpha complex64, x, y []complex64) { + for i, v := range x { + y[i] += alpha * v + } +} + +// AxpyUnitaryTo is +// +// for i, v := range x { +// dst[i] = alpha*v + y[i] +// } +func AxpyUnitaryTo(dst []complex64, alpha complex64, x, y []complex64) { + for i, v := range x { + dst[i] = alpha*v + y[i] + } +} + +// AxpyInc is +// +// for i := 0; i < int(n); i++ { +// y[iy] += alpha * x[ix] +// ix += incX +// iy += incY +// } +func AxpyInc(alpha complex64, x, y []complex64, n, incX, incY, ix, iy uintptr) { + for i := 0; i < int(n); i++ { + y[iy] += alpha * x[ix] + ix += incX + iy += incY + } +} + +// AxpyIncTo is +// +// for i := 0; i < int(n); i++ { +// dst[idst] = alpha*x[ix] + y[iy] +// ix += incX +// iy += incY +// idst += incDst +// } +func AxpyIncTo(dst []complex64, incDst, idst uintptr, alpha complex64, x, y []complex64, n, incX, incY, ix, iy uintptr) { + for i := 0; i < int(n); i++ { + dst[idst] = alpha*x[ix] + y[iy] + ix += incX + iy += incY + idst += incDst + } +} + +// DotcUnitary is +// +// for i, v := range x { +// sum += y[i] * conj(v) +// } +// return sum +func DotcUnitary(x, y []complex64) (sum complex64) { + for i, v := range x { + sum += y[i] * conj(v) + } + return sum +} + +// DotcInc is +// +// for i := 0; i < int(n); i++ { +// sum += y[iy] * conj(x[ix]) +// ix += incX +// iy += incY +// } +// return sum +func DotcInc(x, y []complex64, n, incX, incY, ix, iy uintptr) (sum complex64) { + for i := 0; i < int(n); i++ { + sum += y[iy] * conj(x[ix]) + ix += incX + iy += incY + } + return sum +} + +// DotuUnitary is +// +// for i, v := range x { +// sum += y[i] * v +// } +// return sum +func DotuUnitary(x, y []complex64) (sum complex64) { + for i, v := range x { + sum += y[i] * v + } + return sum +} + +// DotuInc is +// +// for i := 0; i < int(n); i++ { +// sum += y[iy] * x[ix] +// ix += incX +// iy += incY +// } +// return sum +func DotuInc(x, y []complex64, n, incX, incY, ix, iy uintptr) (sum complex64) { + for i := 0; i < int(n); i++ { + sum += y[iy] * x[ix] + ix += incX + iy += incY + } + return sum +} diff --git a/vendor/gonum.org/v1/gonum/internal/asm/f32/axpyinc_amd64.s b/vendor/gonum.org/v1/gonum/internal/asm/f32/axpyinc_amd64.s new file mode 100644 index 0000000000..c0b84cd81e --- /dev/null +++ b/vendor/gonum.org/v1/gonum/internal/asm/f32/axpyinc_amd64.s @@ -0,0 +1,73 @@ +// Copyright ©2016 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build !noasm,!gccgo,!safe + +#include "textflag.h" + +// func AxpyInc(alpha float32, x, y []float32, n, incX, incY, ix, iy uintptr) +TEXT ·AxpyInc(SB), NOSPLIT, $0 + MOVQ n+56(FP), CX // CX = n + CMPQ CX, $0 // if n==0 { return } + JLE axpyi_end + MOVQ x_base+8(FP), SI // SI = &x + MOVQ y_base+32(FP), DI // DI = &y + MOVQ ix+80(FP), R8 // R8 = ix + MOVQ iy+88(FP), R9 // R9 = iy + LEAQ (SI)(R8*4), SI // SI = &(x[ix]) + LEAQ (DI)(R9*4), DI // DI = &(y[iy]) + MOVQ DI, DX // DX = DI Read Pointer for y + MOVQ incX+64(FP), R8 // R8 = incX + SHLQ $2, R8 // R8 *= sizeof(float32) + MOVQ incY+72(FP), R9 // R9 = incY + SHLQ $2, R9 // R9 *= sizeof(float32) + MOVSS alpha+0(FP), X0 // X0 = alpha + MOVSS X0, X1 // X1 = X0 // for pipelining + MOVQ CX, BX + ANDQ $3, BX // BX = n % 4 + SHRQ $2, CX // CX = floor( n / 4 ) + JZ axpyi_tail_start // if CX == 0 { goto axpyi_tail_start } + +axpyi_loop: // Loop unrolled 4x do { + MOVSS (SI), X2 // X_i = x[i] + MOVSS (SI)(R8*1), X3 + LEAQ (SI)(R8*2), SI // SI = &(SI[incX*2]) + MOVSS (SI), X4 + MOVSS (SI)(R8*1), X5 + MULSS X1, X2 // X_i *= a + MULSS X0, X3 + MULSS X1, X4 + MULSS X0, X5 + ADDSS (DX), X2 // X_i += y[i] + ADDSS (DX)(R9*1), X3 + LEAQ (DX)(R9*2), DX // DX = &(DX[incY*2]) + ADDSS (DX), X4 + ADDSS (DX)(R9*1), X5 + MOVSS X2, (DI) // y[i] = X_i + MOVSS X3, (DI)(R9*1) + LEAQ (DI)(R9*2), DI // DI = &(DI[incY*2]) + MOVSS X4, (DI) + MOVSS X5, (DI)(R9*1) + LEAQ (SI)(R8*2), SI // SI = &(SI[incX*2]) // Increment addresses + LEAQ (DX)(R9*2), DX // DX = &(DX[incY*2]) + LEAQ (DI)(R9*2), DI // DI = &(DI[incY*2]) + LOOP axpyi_loop // } while --CX > 0 + CMPQ BX, $0 // if BX == 0 { return } + JE axpyi_end + +axpyi_tail_start: // Reset loop registers + MOVQ BX, CX // Loop counter: CX = BX + +axpyi_tail: // do { + MOVSS (SI), X2 // X2 = x[i] + MULSS X1, X2 // X2 *= a + ADDSS (DI), X2 // X2 += y[i] + MOVSS X2, (DI) // y[i] = X2 + ADDQ R8, SI // SI = &(SI[incX]) + ADDQ R9, DI // DI = &(DI[incY]) + LOOP axpyi_tail // } while --CX > 0 + +axpyi_end: + RET + diff --git a/vendor/gonum.org/v1/gonum/internal/asm/f32/axpyincto_amd64.s b/vendor/gonum.org/v1/gonum/internal/asm/f32/axpyincto_amd64.s new file mode 100644 index 0000000000..3f1d2b9330 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/internal/asm/f32/axpyincto_amd64.s @@ -0,0 +1,78 @@ +// Copyright ©2016 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build !noasm,!gccgo,!safe + +#include "textflag.h" + +// func AxpyIncTo(dst []float32, incDst, idst uintptr, alpha float32, x, y []float32, n, incX, incY, ix, iy uintptr) +TEXT ·AxpyIncTo(SB), NOSPLIT, $0 + MOVQ n+96(FP), CX // CX = n + CMPQ CX, $0 // if n==0 { return } + JLE axpyi_end + MOVQ dst_base+0(FP), DI // DI = &dst + MOVQ x_base+48(FP), SI // SI = &x + MOVQ y_base+72(FP), DX // DX = &y + MOVQ ix+120(FP), R8 // R8 = ix // Load the first index + MOVQ iy+128(FP), R9 // R9 = iy + MOVQ idst+32(FP), R10 // R10 = idst + LEAQ (SI)(R8*4), SI // SI = &(x[ix]) + LEAQ (DX)(R9*4), DX // DX = &(y[iy]) + LEAQ (DI)(R10*4), DI // DI = &(dst[idst]) + MOVQ incX+104(FP), R8 // R8 = incX + SHLQ $2, R8 // R8 *= sizeof(float32) + MOVQ incY+112(FP), R9 // R9 = incY + SHLQ $2, R9 // R9 *= sizeof(float32) + MOVQ incDst+24(FP), R10 // R10 = incDst + SHLQ $2, R10 // R10 *= sizeof(float32) + MOVSS alpha+40(FP), X0 // X0 = alpha + MOVSS X0, X1 // X1 = X0 // for pipelining + MOVQ CX, BX + ANDQ $3, BX // BX = n % 4 + SHRQ $2, CX // CX = floor( n / 4 ) + JZ axpyi_tail_start // if CX == 0 { goto axpyi_tail_start } + +axpyi_loop: // Loop unrolled 4x do { + MOVSS (SI), X2 // X_i = x[i] + MOVSS (SI)(R8*1), X3 + LEAQ (SI)(R8*2), SI // SI = &(SI[incX*2]) + MOVSS (SI), X4 + MOVSS (SI)(R8*1), X5 + MULSS X1, X2 // X_i *= a + MULSS X0, X3 + MULSS X1, X4 + MULSS X0, X5 + ADDSS (DX), X2 // X_i += y[i] + ADDSS (DX)(R9*1), X3 + LEAQ (DX)(R9*2), DX // DX = &(DX[incY*2]) + ADDSS (DX), X4 + ADDSS (DX)(R9*1), X5 + MOVSS X2, (DI) // dst[i] = X_i + MOVSS X3, (DI)(R10*1) + LEAQ (DI)(R10*2), DI // DI = &(DI[incDst*2]) + MOVSS X4, (DI) + MOVSS X5, (DI)(R10*1) + LEAQ (SI)(R8*2), SI // SI = &(SI[incX*2]) // Increment addresses + LEAQ (DX)(R9*2), DX // DX = &(DX[incY*2]) + LEAQ (DI)(R10*2), DI // DI = &(DI[incDst*2]) + LOOP axpyi_loop // } while --CX > 0 + CMPQ BX, $0 // if BX == 0 { return } + JE axpyi_end + +axpyi_tail_start: // Reset loop registers + MOVQ BX, CX // Loop counter: CX = BX + +axpyi_tail: // do { + MOVSS (SI), X2 // X2 = x[i] + MULSS X1, X2 // X2 *= a + ADDSS (DX), X2 // X2 += y[i] + MOVSS X2, (DI) // dst[i] = X2 + ADDQ R8, SI // SI = &(SI[incX]) + ADDQ R9, DX // DX = &(DX[incY]) + ADDQ R10, DI // DI = &(DI[incY]) + LOOP axpyi_tail // } while --CX > 0 + +axpyi_end: + RET + diff --git a/vendor/gonum.org/v1/gonum/internal/asm/f32/axpyunitary_amd64.s b/vendor/gonum.org/v1/gonum/internal/asm/f32/axpyunitary_amd64.s new file mode 100644 index 0000000000..8e24be8100 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/internal/asm/f32/axpyunitary_amd64.s @@ -0,0 +1,97 @@ +// Copyright ©2016 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build !noasm,!gccgo,!safe + +#include "textflag.h" + +// func AxpyUnitary(alpha float32, x, y []float32) +TEXT ·AxpyUnitary(SB), NOSPLIT, $0 + MOVQ x_base+8(FP), SI // SI = &x + MOVQ y_base+32(FP), DI // DI = &y + MOVQ x_len+16(FP), BX // BX = min( len(x), len(y) ) + CMPQ y_len+40(FP), BX + CMOVQLE y_len+40(FP), BX + CMPQ BX, $0 // if BX == 0 { return } + JE axpy_end + MOVSS alpha+0(FP), X0 + SHUFPS $0, X0, X0 // X0 = { a, a, a, a } + XORQ AX, AX // i = 0 + PXOR X2, X2 // 2 NOP instructions (PXOR) to align + PXOR X3, X3 // loop to cache line + MOVQ DI, CX + ANDQ $0xF, CX // Align on 16-byte boundary for ADDPS + JZ axpy_no_trim // if CX == 0 { goto axpy_no_trim } + + XORQ $0xF, CX // CX = 4 - floor( BX % 16 / 4 ) + INCQ CX + SHRQ $2, CX + +axpy_align: // Trim first value(s) in unaligned buffer do { + MOVSS (SI)(AX*4), X2 // X2 = x[i] + MULSS X0, X2 // X2 *= a + ADDSS (DI)(AX*4), X2 // X2 += y[i] + MOVSS X2, (DI)(AX*4) // y[i] = X2 + INCQ AX // i++ + DECQ BX + JZ axpy_end // if --BX == 0 { return } + LOOP axpy_align // } while --CX > 0 + +axpy_no_trim: + MOVUPS X0, X1 // Copy X0 to X1 for pipelining + MOVQ BX, CX + ANDQ $0xF, BX // BX = len % 16 + SHRQ $4, CX // CX = int( len / 16 ) + JZ axpy_tail4_start // if CX == 0 { return } + +axpy_loop: // Loop unrolled 16x do { + MOVUPS (SI)(AX*4), X2 // X2 = x[i:i+4] + MOVUPS 16(SI)(AX*4), X3 + MOVUPS 32(SI)(AX*4), X4 + MOVUPS 48(SI)(AX*4), X5 + MULPS X0, X2 // X2 *= a + MULPS X1, X3 + MULPS X0, X4 + MULPS X1, X5 + ADDPS (DI)(AX*4), X2 // X2 += y[i:i+4] + ADDPS 16(DI)(AX*4), X3 + ADDPS 32(DI)(AX*4), X4 + ADDPS 48(DI)(AX*4), X5 + MOVUPS X2, (DI)(AX*4) // dst[i:i+4] = X2 + MOVUPS X3, 16(DI)(AX*4) + MOVUPS X4, 32(DI)(AX*4) + MOVUPS X5, 48(DI)(AX*4) + ADDQ $16, AX // i += 16 + LOOP axpy_loop // while (--CX) > 0 + CMPQ BX, $0 // if BX == 0 { return } + JE axpy_end + +axpy_tail4_start: // Reset loop counter for 4-wide tail loop + MOVQ BX, CX // CX = floor( BX / 4 ) + SHRQ $2, CX + JZ axpy_tail_start // if CX == 0 { goto axpy_tail_start } + +axpy_tail4: // Loop unrolled 4x do { + MOVUPS (SI)(AX*4), X2 // X2 = x[i] + MULPS X0, X2 // X2 *= a + ADDPS (DI)(AX*4), X2 // X2 += y[i] + MOVUPS X2, (DI)(AX*4) // y[i] = X2 + ADDQ $4, AX // i += 4 + LOOP axpy_tail4 // } while --CX > 0 + +axpy_tail_start: // Reset loop counter for 1-wide tail loop + MOVQ BX, CX // CX = BX % 4 + ANDQ $3, CX + JZ axpy_end // if CX == 0 { return } + +axpy_tail: + MOVSS (SI)(AX*4), X1 // X1 = x[i] + MULSS X0, X1 // X1 *= a + ADDSS (DI)(AX*4), X1 // X1 += y[i] + MOVSS X1, (DI)(AX*4) // y[i] = X1 + INCQ AX // i++ + LOOP axpy_tail // } while --CX > 0 + +axpy_end: + RET diff --git a/vendor/gonum.org/v1/gonum/internal/asm/f32/axpyunitaryto_amd64.s b/vendor/gonum.org/v1/gonum/internal/asm/f32/axpyunitaryto_amd64.s new file mode 100644 index 0000000000..9a68f0f491 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/internal/asm/f32/axpyunitaryto_amd64.s @@ -0,0 +1,98 @@ +// Copyright ©2016 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build !noasm,!gccgo,!safe + +#include "textflag.h" + +// func AxpyUnitaryTo(dst []float32, alpha float32, x, y []float32) +TEXT ·AxpyUnitaryTo(SB), NOSPLIT, $0 + MOVQ dst_base+0(FP), DI // DI = &dst + MOVQ x_base+32(FP), SI // SI = &x + MOVQ y_base+56(FP), DX // DX = &y + MOVQ x_len+40(FP), BX // BX = min( len(x), len(y), len(dst) ) + CMPQ y_len+64(FP), BX + CMOVQLE y_len+64(FP), BX + CMPQ dst_len+8(FP), BX + CMOVQLE dst_len+8(FP), BX + CMPQ BX, $0 // if BX == 0 { return } + JE axpy_end + MOVSS alpha+24(FP), X0 + SHUFPS $0, X0, X0 // X0 = { a, a, a, a, } + XORQ AX, AX // i = 0 + MOVQ DX, CX + ANDQ $0xF, CX // Align on 16-byte boundary for ADDPS + JZ axpy_no_trim // if CX == 0 { goto axpy_no_trim } + + XORQ $0xF, CX // CX = 4 - floor ( B % 16 / 4 ) + INCQ CX + SHRQ $2, CX + +axpy_align: // Trim first value(s) in unaligned buffer do { + MOVSS (SI)(AX*4), X2 // X2 = x[i] + MULSS X0, X2 // X2 *= a + ADDSS (DX)(AX*4), X2 // X2 += y[i] + MOVSS X2, (DI)(AX*4) // y[i] = X2 + INCQ AX // i++ + DECQ BX + JZ axpy_end // if --BX == 0 { return } + LOOP axpy_align // } while --CX > 0 + +axpy_no_trim: + MOVUPS X0, X1 // Copy X0 to X1 for pipelining + MOVQ BX, CX + ANDQ $0xF, BX // BX = len % 16 + SHRQ $4, CX // CX = floor( len / 16 ) + JZ axpy_tail4_start // if CX == 0 { return } + +axpy_loop: // Loop unrolled 16x do { + MOVUPS (SI)(AX*4), X2 // X2 = x[i:i+4] + MOVUPS 16(SI)(AX*4), X3 + MOVUPS 32(SI)(AX*4), X4 + MOVUPS 48(SI)(AX*4), X5 + MULPS X0, X2 // X2 *= a + MULPS X1, X3 + MULPS X0, X4 + MULPS X1, X5 + ADDPS (DX)(AX*4), X2 // X2 += y[i:i+4] + ADDPS 16(DX)(AX*4), X3 + ADDPS 32(DX)(AX*4), X4 + ADDPS 48(DX)(AX*4), X5 + MOVUPS X2, (DI)(AX*4) // dst[i:i+4] = X2 + MOVUPS X3, 16(DI)(AX*4) + MOVUPS X4, 32(DI)(AX*4) + MOVUPS X5, 48(DI)(AX*4) + ADDQ $16, AX // i += 16 + LOOP axpy_loop // while (--CX) > 0 + CMPQ BX, $0 // if BX == 0 { return } + JE axpy_end + +axpy_tail4_start: // Reset loop counter for 4-wide tail loop + MOVQ BX, CX // CX = floor( BX / 4 ) + SHRQ $2, CX + JZ axpy_tail_start // if CX == 0 { goto axpy_tail_start } + +axpy_tail4: // Loop unrolled 4x do { + MOVUPS (SI)(AX*4), X2 // X2 = x[i] + MULPS X0, X2 // X2 *= a + ADDPS (DX)(AX*4), X2 // X2 += y[i] + MOVUPS X2, (DI)(AX*4) // y[i] = X2 + ADDQ $4, AX // i += 4 + LOOP axpy_tail4 // } while --CX > 0 + +axpy_tail_start: // Reset loop counter for 1-wide tail loop + MOVQ BX, CX // CX = BX % 4 + ANDQ $3, CX + JZ axpy_end // if CX == 0 { return } + +axpy_tail: + MOVSS (SI)(AX*4), X1 // X1 = x[i] + MULSS X0, X1 // X1 *= a + ADDSS (DX)(AX*4), X1 // X1 += y[i] + MOVSS X1, (DI)(AX*4) // y[i] = X1 + INCQ AX // i++ + LOOP axpy_tail // } while --CX > 0 + +axpy_end: + RET diff --git a/vendor/gonum.org/v1/gonum/internal/asm/f32/ddotinc_amd64.s b/vendor/gonum.org/v1/gonum/internal/asm/f32/ddotinc_amd64.s new file mode 100644 index 0000000000..85fcd89eed --- /dev/null +++ b/vendor/gonum.org/v1/gonum/internal/asm/f32/ddotinc_amd64.s @@ -0,0 +1,91 @@ +// Copyright ©2017 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build !noasm,!gccgo,!safe + +#include "textflag.h" + +#define X_PTR SI +#define Y_PTR DI +#define LEN CX +#define TAIL BX +#define INC_X R8 +#define INCx3_X R10 +#define INC_Y R9 +#define INCx3_Y R11 +#define SUM X0 +#define P_SUM X1 + +// func DdotInc(x, y []float32, n, incX, incY, ix, iy uintptr) (sum float64) +TEXT ·DdotInc(SB), NOSPLIT, $0 + MOVQ x_base+0(FP), X_PTR // X_PTR = &x + MOVQ y_base+24(FP), Y_PTR // Y_PTR = &y + MOVQ n+48(FP), LEN // LEN = n + PXOR SUM, SUM // SUM = 0 + CMPQ LEN, $0 + JE dot_end + + MOVQ ix+72(FP), INC_X // INC_X = ix + MOVQ iy+80(FP), INC_Y // INC_Y = iy + LEAQ (X_PTR)(INC_X*4), X_PTR // X_PTR = &(x[ix]) + LEAQ (Y_PTR)(INC_Y*4), Y_PTR // Y_PTR = &(y[iy]) + + MOVQ incX+56(FP), INC_X // INC_X = incX * sizeof(float32) + SHLQ $2, INC_X + MOVQ incY+64(FP), INC_Y // INC_Y = incY * sizeof(float32) + SHLQ $2, INC_Y + + MOVQ LEN, TAIL + ANDQ $3, TAIL // TAIL = LEN % 4 + SHRQ $2, LEN // LEN = floor( LEN / 4 ) + JZ dot_tail // if LEN == 0 { goto dot_tail } + + PXOR P_SUM, P_SUM // P_SUM = 0 for pipelining + LEAQ (INC_X)(INC_X*2), INCx3_X // INCx3_X = INC_X * 3 + LEAQ (INC_Y)(INC_Y*2), INCx3_Y // INCx3_Y = INC_Y * 3 + +dot_loop: // Loop unrolled 4x do { + CVTSS2SD (X_PTR), X2 // X_i = x[i:i+1] + CVTSS2SD (X_PTR)(INC_X*1), X3 + CVTSS2SD (X_PTR)(INC_X*2), X4 + CVTSS2SD (X_PTR)(INCx3_X*1), X5 + + CVTSS2SD (Y_PTR), X6 // X_j = y[i:i+1] + CVTSS2SD (Y_PTR)(INC_Y*1), X7 + CVTSS2SD (Y_PTR)(INC_Y*2), X8 + CVTSS2SD (Y_PTR)(INCx3_Y*1), X9 + + MULSD X6, X2 // X_i *= X_j + MULSD X7, X3 + MULSD X8, X4 + MULSD X9, X5 + + ADDSD X2, SUM // SUM += X_i + ADDSD X3, P_SUM + ADDSD X4, SUM + ADDSD X5, P_SUM + + LEAQ (X_PTR)(INC_X*4), X_PTR // X_PTR = &(X_PTR[INC_X * 4]) + LEAQ (Y_PTR)(INC_Y*4), Y_PTR // Y_PTR = &(Y_PTR[INC_Y * 4]) + + DECQ LEN + JNZ dot_loop // } while --LEN > 0 + + ADDSD P_SUM, SUM // SUM += P_SUM + CMPQ TAIL, $0 // if TAIL == 0 { return } + JE dot_end + +dot_tail: // do { + CVTSS2SD (X_PTR), X2 // X2 = x[i] + CVTSS2SD (Y_PTR), X3 // X2 *= y[i] + MULSD X3, X2 + ADDSD X2, SUM // SUM += X2 + ADDQ INC_X, X_PTR // X_PTR += INC_X + ADDQ INC_Y, Y_PTR // Y_PTR += INC_Y + DECQ TAIL + JNZ dot_tail // } while --TAIL > 0 + +dot_end: + MOVSD SUM, sum+88(FP) // return SUM + RET diff --git a/vendor/gonum.org/v1/gonum/internal/asm/f32/ddotunitary_amd64.s b/vendor/gonum.org/v1/gonum/internal/asm/f32/ddotunitary_amd64.s new file mode 100644 index 0000000000..87ef09fa39 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/internal/asm/f32/ddotunitary_amd64.s @@ -0,0 +1,110 @@ +// Copyright ©2017 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build !noasm,!gccgo,!safe + +#include "textflag.h" + +#define HADDPD_SUM_SUM LONG $0xC07C0F66 // @ HADDPD X0, X0 + +#define X_PTR SI +#define Y_PTR DI +#define LEN CX +#define TAIL BX +#define IDX AX +#define SUM X0 +#define P_SUM X1 + +// func DdotUnitary(x, y []float32) (sum float32) +TEXT ·DdotUnitary(SB), NOSPLIT, $0 + MOVQ x_base+0(FP), X_PTR // X_PTR = &x + MOVQ y_base+24(FP), Y_PTR // Y_PTR = &y + MOVQ x_len+8(FP), LEN // LEN = min( len(x), len(y) ) + CMPQ y_len+32(FP), LEN + CMOVQLE y_len+32(FP), LEN + PXOR SUM, SUM // psum = 0 + CMPQ LEN, $0 + JE dot_end + + XORQ IDX, IDX + MOVQ Y_PTR, DX + ANDQ $0xF, DX // Align on 16-byte boundary for ADDPS + JZ dot_no_trim // if DX == 0 { goto dot_no_trim } + + SUBQ $16, DX + +dot_align: // Trim first value(s) in unaligned buffer do { + CVTSS2SD (X_PTR)(IDX*4), X2 // X2 = float64(x[i]) + CVTSS2SD (Y_PTR)(IDX*4), X3 // X3 = float64(y[i]) + MULSD X3, X2 + ADDSD X2, SUM // SUM += X2 + INCQ IDX // IDX++ + DECQ LEN + JZ dot_end // if --TAIL == 0 { return } + ADDQ $4, DX + JNZ dot_align // } while --LEN > 0 + +dot_no_trim: + PXOR P_SUM, P_SUM // P_SUM = 0 for pipelining + MOVQ LEN, TAIL + ANDQ $0x7, TAIL // TAIL = LEN % 8 + SHRQ $3, LEN // LEN = floor( LEN / 8 ) + JZ dot_tail_start // if LEN == 0 { goto dot_tail_start } + +dot_loop: // Loop unrolled 8x do { + CVTPS2PD (X_PTR)(IDX*4), X2 // X_i = x[i:i+1] + CVTPS2PD 8(X_PTR)(IDX*4), X3 + CVTPS2PD 16(X_PTR)(IDX*4), X4 + CVTPS2PD 24(X_PTR)(IDX*4), X5 + + CVTPS2PD (Y_PTR)(IDX*4), X6 // X_j = y[i:i+1] + CVTPS2PD 8(Y_PTR)(IDX*4), X7 + CVTPS2PD 16(Y_PTR)(IDX*4), X8 + CVTPS2PD 24(Y_PTR)(IDX*4), X9 + + MULPD X6, X2 // X_i *= X_j + MULPD X7, X3 + MULPD X8, X4 + MULPD X9, X5 + + ADDPD X2, SUM // SUM += X_i + ADDPD X3, P_SUM + ADDPD X4, SUM + ADDPD X5, P_SUM + + ADDQ $8, IDX // IDX += 8 + DECQ LEN + JNZ dot_loop // } while --LEN > 0 + + ADDPD P_SUM, SUM // SUM += P_SUM + CMPQ TAIL, $0 // if TAIL == 0 { return } + JE dot_end + +dot_tail_start: + MOVQ TAIL, LEN + SHRQ $1, LEN + JZ dot_tail_one + +dot_tail_two: + CVTPS2PD (X_PTR)(IDX*4), X2 // X_i = x[i:i+1] + CVTPS2PD (Y_PTR)(IDX*4), X6 // X_j = y[i:i+1] + MULPD X6, X2 // X_i *= X_j + ADDPD X2, SUM // SUM += X_i + ADDQ $2, IDX // IDX += 2 + DECQ LEN + JNZ dot_tail_two // } while --LEN > 0 + + ANDQ $1, TAIL + JZ dot_end + +dot_tail_one: + CVTSS2SD (X_PTR)(IDX*4), X2 // X2 = float64(x[i]) + CVTSS2SD (Y_PTR)(IDX*4), X3 // X3 = float64(y[i]) + MULSD X3, X2 // X2 *= X3 + ADDSD X2, SUM // SUM += X2 + +dot_end: + HADDPD_SUM_SUM // SUM = \sum{ SUM[i] } + MOVSD SUM, sum+48(FP) // return SUM + RET diff --git a/vendor/gonum.org/v1/gonum/internal/asm/f32/doc.go b/vendor/gonum.org/v1/gonum/internal/asm/f32/doc.go new file mode 100644 index 0000000000..408847a698 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/internal/asm/f32/doc.go @@ -0,0 +1,6 @@ +// Copyright ©2017 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package f32 provides float32 vector primitives. +package f32 // import "gonum.org/v1/gonum/internal/asm/f32" diff --git a/vendor/gonum.org/v1/gonum/internal/asm/f32/dotinc_amd64.s b/vendor/gonum.org/v1/gonum/internal/asm/f32/dotinc_amd64.s new file mode 100644 index 0000000000..9ac8063691 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/internal/asm/f32/dotinc_amd64.s @@ -0,0 +1,85 @@ +// Copyright ©2017 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build !noasm,!gccgo,!safe + +#include "textflag.h" + +#define X_PTR SI +#define Y_PTR DI +#define LEN CX +#define TAIL BX +#define INC_X R8 +#define INCx3_X R10 +#define INC_Y R9 +#define INCx3_Y R11 +#define SUM X0 +#define P_SUM X1 + +// func DotInc(x, y []float32, n, incX, incY, ix, iy uintptr) (sum float32) +TEXT ·DotInc(SB), NOSPLIT, $0 + MOVQ x_base+0(FP), X_PTR // X_PTR = &x + MOVQ y_base+24(FP), Y_PTR // Y_PTR = &y + PXOR SUM, SUM // SUM = 0 + MOVQ n+48(FP), LEN // LEN = n + CMPQ LEN, $0 + JE dot_end + + MOVQ ix+72(FP), INC_X // INC_X = ix + MOVQ iy+80(FP), INC_Y // INC_Y = iy + LEAQ (X_PTR)(INC_X*4), X_PTR // X_PTR = &(x[ix]) + LEAQ (Y_PTR)(INC_Y*4), Y_PTR // Y_PTR = &(y[iy]) + + MOVQ incX+56(FP), INC_X // INC_X := incX * sizeof(float32) + SHLQ $2, INC_X + MOVQ incY+64(FP), INC_Y // INC_Y := incY * sizeof(float32) + SHLQ $2, INC_Y + + MOVQ LEN, TAIL + ANDQ $0x3, TAIL // TAIL = LEN % 4 + SHRQ $2, LEN // LEN = floor( LEN / 4 ) + JZ dot_tail // if LEN == 0 { goto dot_tail } + + PXOR P_SUM, P_SUM // P_SUM = 0 for pipelining + LEAQ (INC_X)(INC_X*2), INCx3_X // INCx3_X = INC_X * 3 + LEAQ (INC_Y)(INC_Y*2), INCx3_Y // INCx3_Y = INC_Y * 3 + +dot_loop: // Loop unrolled 4x do { + MOVSS (X_PTR), X2 // X_i = x[i:i+1] + MOVSS (X_PTR)(INC_X*1), X3 + MOVSS (X_PTR)(INC_X*2), X4 + MOVSS (X_PTR)(INCx3_X*1), X5 + + MULSS (Y_PTR), X2 // X_i *= y[i:i+1] + MULSS (Y_PTR)(INC_Y*1), X3 + MULSS (Y_PTR)(INC_Y*2), X4 + MULSS (Y_PTR)(INCx3_Y*1), X5 + + ADDSS X2, SUM // SUM += X_i + ADDSS X3, P_SUM + ADDSS X4, SUM + ADDSS X5, P_SUM + + LEAQ (X_PTR)(INC_X*4), X_PTR // X_PTR = &(X_PTR[INC_X * 4]) + LEAQ (Y_PTR)(INC_Y*4), Y_PTR // Y_PTR = &(Y_PTR[INC_Y * 4]) + + DECQ LEN + JNZ dot_loop // } while --LEN > 0 + + ADDSS P_SUM, SUM // P_SUM += SUM + CMPQ TAIL, $0 // if TAIL == 0 { return } + JE dot_end + +dot_tail: // do { + MOVSS (X_PTR), X2 // X2 = x[i] + MULSS (Y_PTR), X2 // X2 *= y[i] + ADDSS X2, SUM // SUM += X2 + ADDQ INC_X, X_PTR // X_PTR += INC_X + ADDQ INC_Y, Y_PTR // Y_PTR += INC_Y + DECQ TAIL + JNZ dot_tail // } while --TAIL > 0 + +dot_end: + MOVSS SUM, sum+88(FP) // return SUM + RET diff --git a/vendor/gonum.org/v1/gonum/internal/asm/f32/dotunitary_amd64.s b/vendor/gonum.org/v1/gonum/internal/asm/f32/dotunitary_amd64.s new file mode 100644 index 0000000000..0023a6e244 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/internal/asm/f32/dotunitary_amd64.s @@ -0,0 +1,106 @@ +// Copyright ©2017 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build !noasm,!gccgo,!safe + +#include "textflag.h" + +#define HADDPS_SUM_SUM LONG $0xC07C0FF2 // @ HADDPS X0, X0 + +#define X_PTR SI +#define Y_PTR DI +#define LEN CX +#define TAIL BX +#define IDX AX +#define SUM X0 +#define P_SUM X1 + +// func DotUnitary(x, y []float32) (sum float32) +TEXT ·DotUnitary(SB), NOSPLIT, $0 + MOVQ x_base+0(FP), X_PTR // X_PTR = &x + MOVQ y_base+24(FP), Y_PTR // Y_PTR = &y + PXOR SUM, SUM // SUM = 0 + MOVQ x_len+8(FP), LEN // LEN = min( len(x), len(y) ) + CMPQ y_len+32(FP), LEN + CMOVQLE y_len+32(FP), LEN + CMPQ LEN, $0 + JE dot_end + + XORQ IDX, IDX + MOVQ Y_PTR, DX + ANDQ $0xF, DX // Align on 16-byte boundary for MULPS + JZ dot_no_trim // if DX == 0 { goto dot_no_trim } + SUBQ $16, DX + +dot_align: // Trim first value(s) in unaligned buffer do { + MOVSS (X_PTR)(IDX*4), X2 // X2 = x[i] + MULSS (Y_PTR)(IDX*4), X2 // X2 *= y[i] + ADDSS X2, SUM // SUM += X2 + INCQ IDX // IDX++ + DECQ LEN + JZ dot_end // if --TAIL == 0 { return } + ADDQ $4, DX + JNZ dot_align // } while --DX > 0 + +dot_no_trim: + PXOR P_SUM, P_SUM // P_SUM = 0 for pipelining + MOVQ LEN, TAIL + ANDQ $0xF, TAIL // TAIL = LEN % 16 + SHRQ $4, LEN // LEN = floor( LEN / 16 ) + JZ dot_tail4_start // if LEN == 0 { goto dot_tail4_start } + +dot_loop: // Loop unrolled 16x do { + MOVUPS (X_PTR)(IDX*4), X2 // X_i = x[i:i+1] + MOVUPS 16(X_PTR)(IDX*4), X3 + MOVUPS 32(X_PTR)(IDX*4), X4 + MOVUPS 48(X_PTR)(IDX*4), X5 + + MULPS (Y_PTR)(IDX*4), X2 // X_i *= y[i:i+1] + MULPS 16(Y_PTR)(IDX*4), X3 + MULPS 32(Y_PTR)(IDX*4), X4 + MULPS 48(Y_PTR)(IDX*4), X5 + + ADDPS X2, SUM // SUM += X_i + ADDPS X3, P_SUM + ADDPS X4, SUM + ADDPS X5, P_SUM + + ADDQ $16, IDX // IDX += 16 + DECQ LEN + JNZ dot_loop // } while --LEN > 0 + + ADDPS P_SUM, SUM // SUM += P_SUM + CMPQ TAIL, $0 // if TAIL == 0 { return } + JE dot_end + +dot_tail4_start: // Reset loop counter for 4-wide tail loop + MOVQ TAIL, LEN // LEN = floor( TAIL / 4 ) + SHRQ $2, LEN + JZ dot_tail_start // if LEN == 0 { goto dot_tail_start } + +dot_tail4_loop: // Loop unrolled 4x do { + MOVUPS (X_PTR)(IDX*4), X2 // X_i = x[i:i+1] + MULPS (Y_PTR)(IDX*4), X2 // X_i *= y[i:i+1] + ADDPS X2, SUM // SUM += X_i + ADDQ $4, IDX // i += 4 + DECQ LEN + JNZ dot_tail4_loop // } while --LEN > 0 + +dot_tail_start: // Reset loop counter for 1-wide tail loop + ANDQ $3, TAIL // TAIL = TAIL % 4 + JZ dot_end // if TAIL == 0 { return } + +dot_tail: // do { + MOVSS (X_PTR)(IDX*4), X2 // X2 = x[i] + MULSS (Y_PTR)(IDX*4), X2 // X2 *= y[i] + ADDSS X2, SUM // psum += X2 + INCQ IDX // IDX++ + DECQ TAIL + JNZ dot_tail // } while --TAIL > 0 + +dot_end: + HADDPS_SUM_SUM // SUM = \sum{ SUM[i] } + HADDPS_SUM_SUM + MOVSS SUM, sum+48(FP) // return SUM + RET diff --git a/vendor/gonum.org/v1/gonum/internal/asm/f32/ge_amd64.go b/vendor/gonum.org/v1/gonum/internal/asm/f32/ge_amd64.go new file mode 100644 index 0000000000..72acba2077 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/internal/asm/f32/ge_amd64.go @@ -0,0 +1,18 @@ +// Copyright ©2017 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build !noasm && !gccgo && !safe +// +build !noasm,!gccgo,!safe + +package f32 + +// Ger performs the rank-one operation +// +// A += alpha * x * yᵀ +// +// where A is an m×n dense matrix, x and y are vectors, and alpha is a scalar. +func Ger(m, n uintptr, alpha float32, + x []float32, incX uintptr, + y []float32, incY uintptr, + a []float32, lda uintptr) diff --git a/vendor/gonum.org/v1/gonum/internal/asm/f32/ge_amd64.s b/vendor/gonum.org/v1/gonum/internal/asm/f32/ge_amd64.s new file mode 100644 index 0000000000..f8fd3df862 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/internal/asm/f32/ge_amd64.s @@ -0,0 +1,757 @@ +// Copyright ©2017 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build !noasm,!gccgo,!safe + +#include "textflag.h" + +#define SIZE 4 +#define BITSIZE 2 +#define KERNELSIZE 3 + +#define M_DIM m+0(FP) +#define M CX +#define N_DIM n+8(FP) +#define N BX + +#define TMP1 R14 +#define TMP2 R15 + +#define X_PTR SI +#define Y y_base+56(FP) +#define Y_PTR DX +#define A_ROW AX +#define A_PTR DI + +#define INC_X R8 +#define INC3_X R9 + +#define INC_Y R10 +#define INC3_Y R11 + +#define LDA R12 +#define LDA3 R13 + +#define ALPHA X0 +#define ALPHA_SPILL al-16(SP) + +#define LOAD_ALPHA \ + MOVSS alpha+16(FP), ALPHA \ + SHUFPS $0, ALPHA, ALPHA + +#define LOAD_SCALED4 \ + PREFETCHNTA 16*SIZE(X_PTR) \ + MOVDDUP (X_PTR), X1 \ + MOVDDUP 2*SIZE(X_PTR), X3 \ + MOVSHDUP X1, X2 \ + MOVSHDUP X3, X4 \ + MOVSLDUP X1, X1 \ + MOVSLDUP X3, X3 \ + MULPS ALPHA, X1 \ + MULPS ALPHA, X2 \ + MULPS ALPHA, X3 \ + MULPS ALPHA, X4 + +#define LOAD_SCALED2 \ + MOVDDUP (X_PTR), X1 \ + MOVSHDUP X1, X2 \ + MOVSLDUP X1, X1 \ + MULPS ALPHA, X1 \ + MULPS ALPHA, X2 + +#define LOAD_SCALED1 \ + MOVSS (X_PTR), X1 \ + SHUFPS $0, X1, X1 \ + MULPS ALPHA, X1 + +#define LOAD_SCALED4_INC \ + PREFETCHNTA (X_PTR)(INC_X*8) \ + MOVSS (X_PTR), X1 \ + MOVSS (X_PTR)(INC_X*1), X2 \ + MOVSS (X_PTR)(INC_X*2), X3 \ + MOVSS (X_PTR)(INC3_X*1), X4 \ + SHUFPS $0, X1, X1 \ + SHUFPS $0, X2, X2 \ + SHUFPS $0, X3, X3 \ + SHUFPS $0, X4, X4 \ + MULPS ALPHA, X1 \ + MULPS ALPHA, X2 \ + MULPS ALPHA, X3 \ + MULPS ALPHA, X4 + +#define LOAD_SCALED2_INC \ + MOVSS (X_PTR), X1 \ + MOVSS (X_PTR)(INC_X*1), X2 \ + SHUFPS $0, X1, X1 \ + SHUFPS $0, X2, X2 \ + MULPS ALPHA, X1 \ + MULPS ALPHA, X2 + +#define KERNEL_LOAD8 \ + MOVUPS (Y_PTR), X5 \ + MOVUPS 4*SIZE(Y_PTR), X6 + +#define KERNEL_LOAD8_INC \ + MOVSS (Y_PTR), X5 \ + MOVSS (Y_PTR)(INC_Y*1), X6 \ + MOVSS (Y_PTR)(INC_Y*2), X7 \ + MOVSS (Y_PTR)(INC3_Y*1), X8 \ + UNPCKLPS X6, X5 \ + UNPCKLPS X8, X7 \ + MOVLHPS X7, X5 \ + LEAQ (Y_PTR)(INC_Y*4), Y_PTR \ + MOVSS (Y_PTR), X6 \ + MOVSS (Y_PTR)(INC_Y*1), X7 \ + MOVSS (Y_PTR)(INC_Y*2), X8 \ + MOVSS (Y_PTR)(INC3_Y*1), X9 \ + UNPCKLPS X7, X6 \ + UNPCKLPS X9, X8 \ + MOVLHPS X8, X6 + +#define KERNEL_LOAD4 \ + MOVUPS (Y_PTR), X5 + +#define KERNEL_LOAD4_INC \ + MOVSS (Y_PTR), X5 \ + MOVSS (Y_PTR)(INC_Y*1), X6 \ + MOVSS (Y_PTR)(INC_Y*2), X7 \ + MOVSS (Y_PTR)(INC3_Y*1), X8 \ + UNPCKLPS X6, X5 \ + UNPCKLPS X8, X7 \ + MOVLHPS X7, X5 + +#define KERNEL_LOAD2 \ + MOVSD (Y_PTR), X5 + +#define KERNEL_LOAD2_INC \ + MOVSS (Y_PTR), X5 \ + MOVSS (Y_PTR)(INC_Y*1), X6 \ + UNPCKLPS X6, X5 + +#define KERNEL_4x8 \ + MOVUPS X5, X7 \ + MOVUPS X6, X8 \ + MOVUPS X5, X9 \ + MOVUPS X6, X10 \ + MOVUPS X5, X11 \ + MOVUPS X6, X12 \ + MULPS X1, X5 \ + MULPS X1, X6 \ + MULPS X2, X7 \ + MULPS X2, X8 \ + MULPS X3, X9 \ + MULPS X3, X10 \ + MULPS X4, X11 \ + MULPS X4, X12 + +#define STORE_4x8 \ + MOVUPS ALPHA, ALPHA_SPILL \ + MOVUPS (A_PTR), X13 \ + ADDPS X13, X5 \ + MOVUPS 4*SIZE(A_PTR), X14 \ + ADDPS X14, X6 \ + MOVUPS (A_PTR)(LDA*1), X15 \ + ADDPS X15, X7 \ + MOVUPS 4*SIZE(A_PTR)(LDA*1), X0 \ + ADDPS X0, X8 \ + MOVUPS (A_PTR)(LDA*2), X13 \ + ADDPS X13, X9 \ + MOVUPS 4*SIZE(A_PTR)(LDA*2), X14 \ + ADDPS X14, X10 \ + MOVUPS (A_PTR)(LDA3*1), X15 \ + ADDPS X15, X11 \ + MOVUPS 4*SIZE(A_PTR)(LDA3*1), X0 \ + ADDPS X0, X12 \ + MOVUPS X5, (A_PTR) \ + MOVUPS X6, 4*SIZE(A_PTR) \ + MOVUPS X7, (A_PTR)(LDA*1) \ + MOVUPS X8, 4*SIZE(A_PTR)(LDA*1) \ + MOVUPS X9, (A_PTR)(LDA*2) \ + MOVUPS X10, 4*SIZE(A_PTR)(LDA*2) \ + MOVUPS X11, (A_PTR)(LDA3*1) \ + MOVUPS X12, 4*SIZE(A_PTR)(LDA3*1) \ + MOVUPS ALPHA_SPILL, ALPHA \ + ADDQ $8*SIZE, A_PTR + +#define KERNEL_4x4 \ + MOVUPS X5, X6 \ + MOVUPS X5, X7 \ + MOVUPS X5, X8 \ + MULPS X1, X5 \ + MULPS X2, X6 \ + MULPS X3, X7 \ + MULPS X4, X8 + +#define STORE_4x4 \ + MOVUPS (A_PTR), X13 \ + ADDPS X13, X5 \ + MOVUPS (A_PTR)(LDA*1), X14 \ + ADDPS X14, X6 \ + MOVUPS (A_PTR)(LDA*2), X15 \ + ADDPS X15, X7 \ + MOVUPS (A_PTR)(LDA3*1), X13 \ + ADDPS X13, X8 \ + MOVUPS X5, (A_PTR) \ + MOVUPS X6, (A_PTR)(LDA*1) \ + MOVUPS X7, (A_PTR)(LDA*2) \ + MOVUPS X8, (A_PTR)(LDA3*1) \ + ADDQ $4*SIZE, A_PTR + +#define KERNEL_4x2 \ + MOVUPS X5, X6 \ + MOVUPS X5, X7 \ + MOVUPS X5, X8 \ + MULPS X1, X5 \ + MULPS X2, X6 \ + MULPS X3, X7 \ + MULPS X4, X8 + +#define STORE_4x2 \ + MOVSD (A_PTR), X9 \ + ADDPS X9, X5 \ + MOVSD (A_PTR)(LDA*1), X10 \ + ADDPS X10, X6 \ + MOVSD (A_PTR)(LDA*2), X11 \ + ADDPS X11, X7 \ + MOVSD (A_PTR)(LDA3*1), X12 \ + ADDPS X12, X8 \ + MOVSD X5, (A_PTR) \ + MOVSD X6, (A_PTR)(LDA*1) \ + MOVSD X7, (A_PTR)(LDA*2) \ + MOVSD X8, (A_PTR)(LDA3*1) \ + ADDQ $2*SIZE, A_PTR + +#define KERNEL_4x1 \ + MOVSS (Y_PTR), X5 \ + MOVSS X5, X6 \ + MOVSS X5, X7 \ + MOVSS X5, X8 \ + MULSS X1, X5 \ + MULSS X2, X6 \ + MULSS X3, X7 \ + MULSS X4, X8 + +#define STORE_4x1 \ + ADDSS (A_PTR), X5 \ + ADDSS (A_PTR)(LDA*1), X6 \ + ADDSS (A_PTR)(LDA*2), X7 \ + ADDSS (A_PTR)(LDA3*1), X8 \ + MOVSS X5, (A_PTR) \ + MOVSS X6, (A_PTR)(LDA*1) \ + MOVSS X7, (A_PTR)(LDA*2) \ + MOVSS X8, (A_PTR)(LDA3*1) \ + ADDQ $SIZE, A_PTR + +#define KERNEL_2x8 \ + MOVUPS X5, X7 \ + MOVUPS X6, X8 \ + MULPS X1, X5 \ + MULPS X1, X6 \ + MULPS X2, X7 \ + MULPS X2, X8 + +#define STORE_2x8 \ + MOVUPS (A_PTR), X9 \ + ADDPS X9, X5 \ + MOVUPS 4*SIZE(A_PTR), X10 \ + ADDPS X10, X6 \ + MOVUPS (A_PTR)(LDA*1), X11 \ + ADDPS X11, X7 \ + MOVUPS 4*SIZE(A_PTR)(LDA*1), X12 \ + ADDPS X12, X8 \ + MOVUPS X5, (A_PTR) \ + MOVUPS X6, 4*SIZE(A_PTR) \ + MOVUPS X7, (A_PTR)(LDA*1) \ + MOVUPS X8, 4*SIZE(A_PTR)(LDA*1) \ + ADDQ $8*SIZE, A_PTR + +#define KERNEL_2x4 \ + MOVUPS X5, X6 \ + MULPS X1, X5 \ + MULPS X2, X6 + +#define STORE_2x4 \ + MOVUPS (A_PTR), X9 \ + ADDPS X9, X5 \ + MOVUPS (A_PTR)(LDA*1), X11 \ + ADDPS X11, X6 \ + MOVUPS X5, (A_PTR) \ + MOVUPS X6, (A_PTR)(LDA*1) \ + ADDQ $4*SIZE, A_PTR + +#define KERNEL_2x2 \ + MOVSD X5, X6 \ + MULPS X1, X5 \ + MULPS X2, X6 + +#define STORE_2x2 \ + MOVSD (A_PTR), X7 \ + ADDPS X7, X5 \ + MOVSD (A_PTR)(LDA*1), X8 \ + ADDPS X8, X6 \ + MOVSD X5, (A_PTR) \ + MOVSD X6, (A_PTR)(LDA*1) \ + ADDQ $2*SIZE, A_PTR + +#define KERNEL_2x1 \ + MOVSS (Y_PTR), X5 \ + MOVSS X5, X6 \ + MULSS X1, X5 \ + MULSS X2, X6 + +#define STORE_2x1 \ + ADDSS (A_PTR), X5 \ + ADDSS (A_PTR)(LDA*1), X6 \ + MOVSS X5, (A_PTR) \ + MOVSS X6, (A_PTR)(LDA*1) \ + ADDQ $SIZE, A_PTR + +#define KERNEL_1x8 \ + MULPS X1, X5 \ + MULPS X1, X6 + +#define STORE_1x8 \ + MOVUPS (A_PTR), X7 \ + ADDPS X7, X5 \ + MOVUPS 4*SIZE(A_PTR), X8 \ + ADDPS X8, X6 \ + MOVUPS X5, (A_PTR) \ + MOVUPS X6, 4*SIZE(A_PTR) \ + ADDQ $8*SIZE, A_PTR + +#define KERNEL_1x4 \ + MULPS X1, X5 \ + MULPS X1, X6 + +#define STORE_1x4 \ + MOVUPS (A_PTR), X7 \ + ADDPS X7, X5 \ + MOVUPS X5, (A_PTR) \ + ADDQ $4*SIZE, A_PTR + +#define KERNEL_1x2 \ + MULPS X1, X5 + +#define STORE_1x2 \ + MOVSD (A_PTR), X6 \ + ADDPS X6, X5 \ + MOVSD X5, (A_PTR) \ + ADDQ $2*SIZE, A_PTR + +#define KERNEL_1x1 \ + MOVSS (Y_PTR), X5 \ + MULSS X1, X5 + +#define STORE_1x1 \ + ADDSS (A_PTR), X5 \ + MOVSS X5, (A_PTR) \ + ADDQ $SIZE, A_PTR + +// func Ger(m, n uintptr, alpha float32, +// x []float32, incX uintptr, +// y []float32, incY uintptr, +// a []float32, lda uintptr) +TEXT ·Ger(SB), 0, $16-120 + MOVQ M_DIM, M + MOVQ N_DIM, N + CMPQ M, $0 + JE end + CMPQ N, $0 + JE end + + LOAD_ALPHA + + MOVQ x_base+24(FP), X_PTR + MOVQ y_base+56(FP), Y_PTR + MOVQ a_base+88(FP), A_ROW + MOVQ A_ROW, A_PTR + MOVQ lda+112(FP), LDA // LDA = LDA * sizeof(float32) + SHLQ $BITSIZE, LDA + LEAQ (LDA)(LDA*2), LDA3 // LDA3 = LDA * 3 + + CMPQ incY+80(FP), $1 // Check for dense vector Y (fast-path) + JNE inc + CMPQ incX+48(FP), $1 // Check for dense vector X (fast-path) + JNE inc + + SHRQ $2, M + JZ r2 + +r4: + + // LOAD 4 + LOAD_SCALED4 + + MOVQ N_DIM, N + SHRQ $KERNELSIZE, N + JZ r4c4 + +r4c8: + // 4x8 KERNEL + KERNEL_LOAD8 + KERNEL_4x8 + STORE_4x8 + + ADDQ $8*SIZE, Y_PTR + + DECQ N + JNZ r4c8 + +r4c4: + TESTQ $4, N_DIM + JZ r4c2 + + // 4x4 KERNEL + KERNEL_LOAD4 + KERNEL_4x4 + STORE_4x4 + + ADDQ $4*SIZE, Y_PTR + +r4c2: + TESTQ $2, N_DIM + JZ r4c1 + + // 4x2 KERNEL + KERNEL_LOAD2 + KERNEL_4x2 + STORE_4x2 + + ADDQ $2*SIZE, Y_PTR + +r4c1: + TESTQ $1, N_DIM + JZ r4end + + // 4x1 KERNEL + KERNEL_4x1 + STORE_4x1 + + ADDQ $SIZE, Y_PTR + +r4end: + ADDQ $4*SIZE, X_PTR + MOVQ Y, Y_PTR + LEAQ (A_ROW)(LDA*4), A_ROW + MOVQ A_ROW, A_PTR + + DECQ M + JNZ r4 + +r2: + TESTQ $2, M_DIM + JZ r1 + + // LOAD 2 + LOAD_SCALED2 + + MOVQ N_DIM, N + SHRQ $KERNELSIZE, N + JZ r2c4 + +r2c8: + // 2x8 KERNEL + KERNEL_LOAD8 + KERNEL_2x8 + STORE_2x8 + + ADDQ $8*SIZE, Y_PTR + + DECQ N + JNZ r2c8 + +r2c4: + TESTQ $4, N_DIM + JZ r2c2 + + // 2x4 KERNEL + KERNEL_LOAD4 + KERNEL_2x4 + STORE_2x4 + + ADDQ $4*SIZE, Y_PTR + +r2c2: + TESTQ $2, N_DIM + JZ r2c1 + + // 2x2 KERNEL + KERNEL_LOAD2 + KERNEL_2x2 + STORE_2x2 + + ADDQ $2*SIZE, Y_PTR + +r2c1: + TESTQ $1, N_DIM + JZ r2end + + // 2x1 KERNEL + KERNEL_2x1 + STORE_2x1 + + ADDQ $SIZE, Y_PTR + +r2end: + ADDQ $2*SIZE, X_PTR + MOVQ Y, Y_PTR + LEAQ (A_ROW)(LDA*2), A_ROW + MOVQ A_ROW, A_PTR + +r1: + TESTQ $1, M_DIM + JZ end + + // LOAD 1 + LOAD_SCALED1 + + MOVQ N_DIM, N + SHRQ $KERNELSIZE, N + JZ r1c4 + +r1c8: + // 1x8 KERNEL + KERNEL_LOAD8 + KERNEL_1x8 + STORE_1x8 + + ADDQ $8*SIZE, Y_PTR + + DECQ N + JNZ r1c8 + +r1c4: + TESTQ $4, N_DIM + JZ r1c2 + + // 1x4 KERNEL + KERNEL_LOAD4 + KERNEL_1x4 + STORE_1x4 + + ADDQ $4*SIZE, Y_PTR + +r1c2: + TESTQ $2, N_DIM + JZ r1c1 + + // 1x2 KERNEL + KERNEL_LOAD2 + KERNEL_1x2 + STORE_1x2 + + ADDQ $2*SIZE, Y_PTR + +r1c1: + TESTQ $1, N_DIM + JZ end + + // 1x1 KERNEL + KERNEL_1x1 + STORE_1x1 + +end: + RET + +inc: // Algorithm for incY != 0 ( split loads in kernel ) + + MOVQ incX+48(FP), INC_X // INC_X = incX * sizeof(float32) + SHLQ $BITSIZE, INC_X + MOVQ incY+80(FP), INC_Y // INC_Y = incY * sizeof(float32) + SHLQ $BITSIZE, INC_Y + LEAQ (INC_X)(INC_X*2), INC3_X // INC3_X = INC_X * 3 + LEAQ (INC_Y)(INC_Y*2), INC3_Y // INC3_Y = INC_Y * 3 + + XORQ TMP2, TMP2 + MOVQ M, TMP1 + SUBQ $1, TMP1 + IMULQ INC_X, TMP1 + NEGQ TMP1 + CMPQ INC_X, $0 + CMOVQLT TMP1, TMP2 + LEAQ (X_PTR)(TMP2*SIZE), X_PTR + + XORQ TMP2, TMP2 + MOVQ N, TMP1 + SUBQ $1, TMP1 + IMULQ INC_Y, TMP1 + NEGQ TMP1 + CMPQ INC_Y, $0 + CMOVQLT TMP1, TMP2 + LEAQ (Y_PTR)(TMP2*SIZE), Y_PTR + + SHRQ $2, M + JZ inc_r2 + +inc_r4: + // LOAD 4 + LOAD_SCALED4_INC + + MOVQ N_DIM, N + SHRQ $KERNELSIZE, N + JZ inc_r4c4 + +inc_r4c8: + // 4x4 KERNEL + KERNEL_LOAD8_INC + KERNEL_4x8 + STORE_4x8 + + LEAQ (Y_PTR)(INC_Y*4), Y_PTR + DECQ N + JNZ inc_r4c8 + +inc_r4c4: + TESTQ $4, N_DIM + JZ inc_r4c2 + + // 4x4 KERNEL + KERNEL_LOAD4_INC + KERNEL_4x4 + STORE_4x4 + + LEAQ (Y_PTR)(INC_Y*4), Y_PTR + +inc_r4c2: + TESTQ $2, N_DIM + JZ inc_r4c1 + + // 4x2 KERNEL + KERNEL_LOAD2_INC + KERNEL_4x2 + STORE_4x2 + + LEAQ (Y_PTR)(INC_Y*2), Y_PTR + +inc_r4c1: + TESTQ $1, N_DIM + JZ inc_r4end + + // 4x1 KERNEL + KERNEL_4x1 + STORE_4x1 + + ADDQ INC_Y, Y_PTR + +inc_r4end: + LEAQ (X_PTR)(INC_X*4), X_PTR + MOVQ Y, Y_PTR + LEAQ (A_ROW)(LDA*4), A_ROW + MOVQ A_ROW, A_PTR + + DECQ M + JNZ inc_r4 + +inc_r2: + TESTQ $2, M_DIM + JZ inc_r1 + + // LOAD 2 + LOAD_SCALED2_INC + + MOVQ N_DIM, N + SHRQ $KERNELSIZE, N + JZ inc_r2c4 + +inc_r2c8: + // 2x8 KERNEL + KERNEL_LOAD8_INC + KERNEL_2x8 + STORE_2x8 + + LEAQ (Y_PTR)(INC_Y*4), Y_PTR + DECQ N + JNZ inc_r2c8 + +inc_r2c4: + TESTQ $4, N_DIM + JZ inc_r2c2 + + // 2x4 KERNEL + KERNEL_LOAD4_INC + KERNEL_2x4 + STORE_2x4 + + LEAQ (Y_PTR)(INC_Y*4), Y_PTR + +inc_r2c2: + TESTQ $2, N_DIM + JZ inc_r2c1 + + // 2x2 KERNEL + KERNEL_LOAD2_INC + KERNEL_2x2 + STORE_2x2 + + LEAQ (Y_PTR)(INC_Y*2), Y_PTR + +inc_r2c1: + TESTQ $1, N_DIM + JZ inc_r2end + + // 2x1 KERNEL + KERNEL_2x1 + STORE_2x1 + + ADDQ INC_Y, Y_PTR + +inc_r2end: + LEAQ (X_PTR)(INC_X*2), X_PTR + MOVQ Y, Y_PTR + LEAQ (A_ROW)(LDA*2), A_ROW + MOVQ A_ROW, A_PTR + +inc_r1: + TESTQ $1, M_DIM + JZ end + + // LOAD 1 + LOAD_SCALED1 + + MOVQ N_DIM, N + SHRQ $KERNELSIZE, N + JZ inc_r1c4 + +inc_r1c8: + // 1x8 KERNEL + KERNEL_LOAD8_INC + KERNEL_1x8 + STORE_1x8 + + LEAQ (Y_PTR)(INC_Y*4), Y_PTR + DECQ N + JNZ inc_r1c8 + +inc_r1c4: + TESTQ $4, N_DIM + JZ inc_r1c2 + + // 1x4 KERNEL + KERNEL_LOAD4_INC + KERNEL_1x4 + STORE_1x4 + + LEAQ (Y_PTR)(INC_Y*4), Y_PTR + +inc_r1c2: + TESTQ $2, N_DIM + JZ inc_r1c1 + + // 1x2 KERNEL + KERNEL_LOAD2_INC + KERNEL_1x2 + STORE_1x2 + + LEAQ (Y_PTR)(INC_Y*2), Y_PTR + +inc_r1c1: + TESTQ $1, N_DIM + JZ inc_end + + // 1x1 KERNEL + KERNEL_1x1 + STORE_1x1 + +inc_end: + RET diff --git a/vendor/gonum.org/v1/gonum/internal/asm/f32/ge_noasm.go b/vendor/gonum.org/v1/gonum/internal/asm/f32/ge_noasm.go new file mode 100644 index 0000000000..61ee6f1802 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/internal/asm/f32/ge_noasm.go @@ -0,0 +1,39 @@ +// Copyright ©2017 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build !amd64 || noasm || gccgo || safe +// +build !amd64 noasm gccgo safe + +package f32 + +// Ger performs the rank-one operation +// +// A += alpha * x * yᵀ +// +// where A is an m×n dense matrix, x and y are vectors, and alpha is a scalar. +func Ger(m, n uintptr, alpha float32, x []float32, incX uintptr, y []float32, incY uintptr, a []float32, lda uintptr) { + + if incX == 1 && incY == 1 { + x = x[:m] + y = y[:n] + for i, xv := range x { + AxpyUnitary(alpha*xv, y, a[uintptr(i)*lda:uintptr(i)*lda+n]) + } + return + } + + var ky, kx uintptr + if int(incY) < 0 { + ky = uintptr(-int(n-1) * int(incY)) + } + if int(incX) < 0 { + kx = uintptr(-int(m-1) * int(incX)) + } + + ix := kx + for i := 0; i < int(m); i++ { + AxpyInc(alpha*x[ix], y, a[uintptr(i)*lda:uintptr(i)*lda+n], uintptr(n), uintptr(incY), 1, uintptr(ky), 0) + ix += incX + } +} diff --git a/vendor/gonum.org/v1/gonum/internal/asm/f32/gemv.go b/vendor/gonum.org/v1/gonum/internal/asm/f32/gemv.go new file mode 100644 index 0000000000..a6000504a7 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/internal/asm/f32/gemv.go @@ -0,0 +1,92 @@ +// Copyright ©2017 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package f32 + +// GemvN computes +// +// y = alpha * A * x + beta * y +// +// where A is an m×n dense matrix, x and y are vectors, and alpha and beta are scalars. +func GemvN(m, n uintptr, alpha float32, a []float32, lda uintptr, x []float32, incX uintptr, beta float32, y []float32, incY uintptr) { + var kx, ky, i uintptr + if int(incX) < 0 { + kx = uintptr(-int(n-1) * int(incX)) + } + if int(incY) < 0 { + ky = uintptr(-int(m-1) * int(incY)) + } + + if incX == 1 && incY == 1 { + if beta == 0 { + for i = 0; i < m; i++ { + y[i] = alpha * DotUnitary(a[lda*i:lda*i+n], x) + } + return + } + for i = 0; i < m; i++ { + y[i] = y[i]*beta + alpha*DotUnitary(a[lda*i:lda*i+n], x) + } + return + } + iy := ky + if beta == 0 { + for i = 0; i < m; i++ { + y[iy] = alpha * DotInc(x, a[lda*i:lda*i+n], n, incX, 1, kx, 0) + iy += incY + } + return + } + for i = 0; i < m; i++ { + y[iy] = y[iy]*beta + alpha*DotInc(x, a[lda*i:lda*i+n], n, incX, 1, kx, 0) + iy += incY + } +} + +// GemvT computes +// +// y = alpha * Aᵀ * x + beta * y +// +// where A is an m×n dense matrix, x and y are vectors, and alpha and beta are scalars. +func GemvT(m, n uintptr, alpha float32, a []float32, lda uintptr, x []float32, incX uintptr, beta float32, y []float32, incY uintptr) { + var kx, ky, i uintptr + if int(incX) < 0 { + kx = uintptr(-int(m-1) * int(incX)) + } + if int(incY) < 0 { + ky = uintptr(-int(n-1) * int(incY)) + } + switch { + case beta == 0: // beta == 0 is special-cased to memclear + if incY == 1 { + for i := range y { + y[i] = 0 + } + } else { + iy := ky + for i := 0; i < int(n); i++ { + y[iy] = 0 + iy += incY + } + } + case int(incY) < 0: + ScalInc(beta, y, n, uintptr(int(-incY))) + case incY == 1: + ScalUnitary(beta, y[:n]) + default: + ScalInc(beta, y, n, incY) + } + + if incX == 1 && incY == 1 { + for i = 0; i < m; i++ { + AxpyUnitaryTo(y, alpha*x[i], a[lda*i:lda*i+n], y) + } + return + } + ix := kx + for i = 0; i < m; i++ { + AxpyInc(alpha*x[ix], a[lda*i:lda*i+n], y, n, 1, incY, 0, ky) + ix += incX + } +} diff --git a/vendor/gonum.org/v1/gonum/internal/asm/f32/l2norm.go b/vendor/gonum.org/v1/gonum/internal/asm/f32/l2norm.go new file mode 100644 index 0000000000..0f2a77405c --- /dev/null +++ b/vendor/gonum.org/v1/gonum/internal/asm/f32/l2norm.go @@ -0,0 +1,90 @@ +// Copyright ©2019 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package f32 + +import "gonum.org/v1/gonum/internal/math32" + +// L2NormUnitary is the level 2 norm of x. +func L2NormUnitary(x []float32) (sum float32) { + var scale float32 + var sumSquares float32 = 1 + for _, v := range x { + if v == 0 { + continue + } + absxi := math32.Abs(v) + if math32.IsNaN(absxi) { + return math32.NaN() + } + if scale < absxi { + s := scale / absxi + sumSquares = 1 + sumSquares*s*s + scale = absxi + } else { + s := absxi / scale + sumSquares += s * s + } + } + if math32.IsInf(scale, 1) { + return math32.Inf(1) + } + return scale * math32.Sqrt(sumSquares) +} + +// L2NormInc is the level 2 norm of x. +func L2NormInc(x []float32, n, incX uintptr) (sum float32) { + var scale float32 + var sumSquares float32 = 1 + for ix := uintptr(0); ix < n*incX; ix += incX { + val := x[ix] + if val == 0 { + continue + } + absxi := math32.Abs(val) + if math32.IsNaN(absxi) { + return math32.NaN() + } + if scale < absxi { + s := scale / absxi + sumSquares = 1 + sumSquares*s*s + scale = absxi + } else { + s := absxi / scale + sumSquares += s * s + } + } + if math32.IsInf(scale, 1) { + return math32.Inf(1) + } + return scale * math32.Sqrt(sumSquares) +} + +// L2DistanceUnitary is the L2 norm of x-y. +func L2DistanceUnitary(x, y []float32) (sum float32) { + var scale float32 + var sumSquares float32 = 1 + for i, v := range x { + v -= y[i] + if v == 0 { + continue + } + absxi := math32.Abs(v) + if math32.IsNaN(absxi) { + return math32.NaN() + } + if scale < absxi { + s := scale / absxi + sumSquares = 1 + sumSquares*s*s + scale = absxi + } else { + s := absxi / scale + sumSquares += s * s + } + } + if math32.IsInf(scale, 1) { + return math32.Inf(1) + } + return scale * math32.Sqrt(sumSquares) +} diff --git a/vendor/gonum.org/v1/gonum/internal/asm/f32/scal.go b/vendor/gonum.org/v1/gonum/internal/asm/f32/scal.go new file mode 100644 index 0000000000..ad2adee652 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/internal/asm/f32/scal.go @@ -0,0 +1,59 @@ +// Copyright ©2016 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package f32 + +// ScalUnitary is +// +// for i := range x { +// x[i] *= alpha +// } +func ScalUnitary(alpha float32, x []float32) { + for i := range x { + x[i] *= alpha + } +} + +// ScalUnitaryTo is +// +// for i, v := range x { +// dst[i] = alpha * v +// } +func ScalUnitaryTo(dst []float32, alpha float32, x []float32) { + for i, v := range x { + dst[i] = alpha * v + } +} + +// ScalInc is +// +// var ix uintptr +// for i := 0; i < int(n); i++ { +// x[ix] *= alpha +// ix += incX +// } +func ScalInc(alpha float32, x []float32, n, incX uintptr) { + var ix uintptr + for i := 0; i < int(n); i++ { + x[ix] *= alpha + ix += incX + } +} + +// ScalIncTo is +// +// var idst, ix uintptr +// for i := 0; i < int(n); i++ { +// dst[idst] = alpha * x[ix] +// ix += incX +// idst += incDst +// } +func ScalIncTo(dst []float32, incDst uintptr, alpha float32, x []float32, n, incX uintptr) { + var idst, ix uintptr + for i := 0; i < int(n); i++ { + dst[idst] = alpha * x[ix] + ix += incX + idst += incDst + } +} diff --git a/vendor/gonum.org/v1/gonum/internal/asm/f32/stubs_amd64.go b/vendor/gonum.org/v1/gonum/internal/asm/f32/stubs_amd64.go new file mode 100644 index 0000000000..2ea0519743 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/internal/asm/f32/stubs_amd64.go @@ -0,0 +1,86 @@ +// Copyright ©2016 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build !noasm && !gccgo && !safe +// +build !noasm,!gccgo,!safe + +package f32 + +// AxpyUnitary is +// +// for i, v := range x { +// y[i] += alpha * v +// } +func AxpyUnitary(alpha float32, x, y []float32) + +// AxpyUnitaryTo is +// +// for i, v := range x { +// dst[i] = alpha*v + y[i] +// } +func AxpyUnitaryTo(dst []float32, alpha float32, x, y []float32) + +// AxpyInc is +// +// for i := 0; i < int(n); i++ { +// y[iy] += alpha * x[ix] +// ix += incX +// iy += incY +// } +func AxpyInc(alpha float32, x, y []float32, n, incX, incY, ix, iy uintptr) + +// AxpyIncTo is +// +// for i := 0; i < int(n); i++ { +// dst[idst] = alpha*x[ix] + y[iy] +// ix += incX +// iy += incY +// idst += incDst +// } +func AxpyIncTo(dst []float32, incDst, idst uintptr, alpha float32, x, y []float32, n, incX, incY, ix, iy uintptr) + +// DdotUnitary is +// +// for i, v := range x { +// sum += float64(y[i]) * float64(v) +// } +// return +func DdotUnitary(x, y []float32) (sum float64) + +// DdotInc is +// +// for i := 0; i < int(n); i++ { +// sum += float64(y[iy]) * float64(x[ix]) +// ix += incX +// iy += incY +// } +// return +func DdotInc(x, y []float32, n, incX, incY, ix, iy uintptr) (sum float64) + +// DotUnitary is +// +// for i, v := range x { +// sum += y[i] * v +// } +// return sum +func DotUnitary(x, y []float32) (sum float32) + +// DotInc is +// +// for i := 0; i < int(n); i++ { +// sum += y[iy] * x[ix] +// ix += incX +// iy += incY +// } +// return sum +func DotInc(x, y []float32, n, incX, incY, ix, iy uintptr) (sum float32) + +// Sum is +// +// var sum float32 +// for _, v := range x { +// sum += v +// } +// return sum +func Sum(x []float32) float32 diff --git a/vendor/gonum.org/v1/gonum/internal/asm/f32/stubs_noasm.go b/vendor/gonum.org/v1/gonum/internal/asm/f32/stubs_noasm.go new file mode 100644 index 0000000000..07b36ff34b --- /dev/null +++ b/vendor/gonum.org/v1/gonum/internal/asm/f32/stubs_noasm.go @@ -0,0 +1,137 @@ +// Copyright ©2016 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build !amd64 || noasm || gccgo || safe +// +build !amd64 noasm gccgo safe + +package f32 + +// AxpyUnitary is +// +// for i, v := range x { +// y[i] += alpha * v +// } +func AxpyUnitary(alpha float32, x, y []float32) { + for i, v := range x { + y[i] += alpha * v + } +} + +// AxpyUnitaryTo is +// +// for i, v := range x { +// dst[i] = alpha*v + y[i] +// } +func AxpyUnitaryTo(dst []float32, alpha float32, x, y []float32) { + for i, v := range x { + dst[i] = alpha*v + y[i] + } +} + +// AxpyInc is +// +// for i := 0; i < int(n); i++ { +// y[iy] += alpha * x[ix] +// ix += incX +// iy += incY +// } +func AxpyInc(alpha float32, x, y []float32, n, incX, incY, ix, iy uintptr) { + for i := 0; i < int(n); i++ { + y[iy] += alpha * x[ix] + ix += incX + iy += incY + } +} + +// AxpyIncTo is +// +// for i := 0; i < int(n); i++ { +// dst[idst] = alpha*x[ix] + y[iy] +// ix += incX +// iy += incY +// idst += incDst +// } +func AxpyIncTo(dst []float32, incDst, idst uintptr, alpha float32, x, y []float32, n, incX, incY, ix, iy uintptr) { + for i := 0; i < int(n); i++ { + dst[idst] = alpha*x[ix] + y[iy] + ix += incX + iy += incY + idst += incDst + } +} + +// DotUnitary is +// +// for i, v := range x { +// sum += y[i] * v +// } +// return sum +func DotUnitary(x, y []float32) (sum float32) { + for i, v := range x { + sum += y[i] * v + } + return sum +} + +// DotInc is +// +// for i := 0; i < int(n); i++ { +// sum += y[iy] * x[ix] +// ix += incX +// iy += incY +// } +// return sum +func DotInc(x, y []float32, n, incX, incY, ix, iy uintptr) (sum float32) { + for i := 0; i < int(n); i++ { + sum += y[iy] * x[ix] + ix += incX + iy += incY + } + return sum +} + +// DdotUnitary is +// +// for i, v := range x { +// sum += float64(y[i]) * float64(v) +// } +// return +func DdotUnitary(x, y []float32) (sum float64) { + for i, v := range x { + sum += float64(y[i]) * float64(v) + } + return +} + +// DdotInc is +// +// for i := 0; i < int(n); i++ { +// sum += float64(y[iy]) * float64(x[ix]) +// ix += incX +// iy += incY +// } +// return +func DdotInc(x, y []float32, n, incX, incY, ix, iy uintptr) (sum float64) { + for i := 0; i < int(n); i++ { + sum += float64(y[iy]) * float64(x[ix]) + ix += incX + iy += incY + } + return +} + +// Sum is +// +// var sum float32 +// for _, v := range x { +// sum += v +// } +// return sum +func Sum(x []float32) float32 { + var sum float32 + for _, v := range x { + sum += v + } + return sum +} diff --git a/vendor/gonum.org/v1/gonum/internal/asm/f32/sum_amd64.s b/vendor/gonum.org/v1/gonum/internal/asm/f32/sum_amd64.s new file mode 100644 index 0000000000..42e96361e4 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/internal/asm/f32/sum_amd64.s @@ -0,0 +1,100 @@ +// Copyright ©2021 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build !noasm,!gccgo,!safe + +#include "textflag.h" + +#define X_PTR SI +#define IDX AX +#define LEN CX +#define TAIL BX +#define SUM X0 +#define SUM_1 X1 +#define SUM_2 X2 +#define SUM_3 X3 + +// func Sum(x []float32) float32 +TEXT ·Sum(SB), NOSPLIT, $0 + MOVQ x_base+0(FP), X_PTR // X_PTR = &x + MOVQ x_len+8(FP), LEN // LEN = len(x) + XORQ IDX, IDX // i = 0 + PXOR SUM, SUM // p_sum_i = 0 + CMPQ LEN, $0 // if LEN == 0 { return 0 } + JE sum_end + + PXOR SUM_1, SUM_1 + PXOR SUM_2, SUM_2 + PXOR SUM_3, SUM_3 + + MOVQ X_PTR, TAIL // Check memory alignment + ANDQ $15, TAIL // TAIL = &x % 16 + JZ no_trim // if TAIL == 0 { goto no_trim } + SUBQ $16, TAIL // TAIL -= 16 + +sum_align: // Align on 16-byte boundary do { + ADDSS (X_PTR)(IDX*4), SUM // SUM += x[0] + INCQ IDX // i++ + DECQ LEN // LEN-- + JZ sum_end // if LEN == 0 { return } + ADDQ $4, TAIL // TAIL += 4 + JNZ sum_align // } while TAIL < 0 + +no_trim: + MOVQ LEN, TAIL + SHRQ $4, LEN // LEN = floor( n / 16 ) + JZ sum_tail8 // if LEN == 0 { goto sum_tail8 } + + +sum_loop: // sum 16x wide do { + ADDPS (X_PTR)(IDX*4), SUM // sum_i += x[i:i+4] + ADDPS 16(X_PTR)(IDX*4), SUM_1 + ADDPS 32(X_PTR)(IDX*4), SUM_2 + ADDPS 48(X_PTR)(IDX*4), SUM_3 + + ADDQ $16, IDX // i += 16 + DECQ LEN + JNZ sum_loop // } while --LEN > 0 + +sum_tail8: + ADDPS SUM_3, SUM + ADDPS SUM_2, SUM_1 + + TESTQ $8, TAIL + JZ sum_tail4 + + ADDPS (X_PTR)(IDX*4), SUM // sum_i += x[i:i+4] + ADDPS 16(X_PTR)(IDX*4), SUM_1 + ADDQ $8, IDX + +sum_tail4: + ADDPS SUM_1, SUM + + TESTQ $4, TAIL + JZ sum_tail2 + + ADDPS (X_PTR)(IDX*4), SUM // sum_i += x[i:i+4] + ADDQ $4, IDX + +sum_tail2: + HADDPS SUM, SUM // sum_i[:2] += sum_i[2:4] + + TESTQ $2, TAIL + JZ sum_tail1 + + MOVSD (X_PTR)(IDX*4), SUM_1 // reuse SUM_1 + ADDPS SUM_1, SUM // sum_i += x[i:i+2] + ADDQ $2, IDX + +sum_tail1: + HADDPS SUM, SUM // sum_i[0] += sum_i[1] + + TESTQ $1, TAIL + JZ sum_end + + ADDSS (X_PTR)(IDX*4), SUM + +sum_end: // return sum + MOVSS SUM, ret+24(FP) + RET diff --git a/vendor/gonum.org/v1/gonum/internal/asm/f64/abssum_amd64.s b/vendor/gonum.org/v1/gonum/internal/asm/f64/abssum_amd64.s new file mode 100644 index 0000000000..df63dc0905 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/internal/asm/f64/abssum_amd64.s @@ -0,0 +1,82 @@ +// Copyright ©2016 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build !noasm,!gccgo,!safe + +#include "textflag.h" + +// func L1Norm(x []float64) float64 +TEXT ·L1Norm(SB), NOSPLIT, $0 + MOVQ x_base+0(FP), SI // SI = &x + MOVQ x_len+8(FP), CX // CX = len(x) + XORQ AX, AX // i = 0 + PXOR X0, X0 // p_sum_i = 0 + PXOR X1, X1 + PXOR X2, X2 + PXOR X3, X3 + PXOR X4, X4 + PXOR X5, X5 + PXOR X6, X6 + PXOR X7, X7 + CMPQ CX, $0 // if CX == 0 { return 0 } + JE absum_end + MOVQ CX, BX + ANDQ $7, BX // BX = len(x) % 8 + SHRQ $3, CX // CX = floor( len(x) / 8 ) + JZ absum_tail_start // if CX == 0 { goto absum_tail_start } + +absum_loop: // do { + // p_sum += max( p_sum + x[i], p_sum - x[i] ) + MOVUPS (SI)(AX*8), X8 // X_i = x[i:i+1] + MOVUPS 16(SI)(AX*8), X9 + MOVUPS 32(SI)(AX*8), X10 + MOVUPS 48(SI)(AX*8), X11 + ADDPD X8, X0 // p_sum_i += X_i ( positive values ) + ADDPD X9, X2 + ADDPD X10, X4 + ADDPD X11, X6 + SUBPD X8, X1 // p_sum_(i+1) -= X_i ( negative values ) + SUBPD X9, X3 + SUBPD X10, X5 + SUBPD X11, X7 + MAXPD X1, X0 // p_sum_i = max( p_sum_i, p_sum_(i+1) ) + MAXPD X3, X2 + MAXPD X5, X4 + MAXPD X7, X6 + MOVAPS X0, X1 // p_sum_(i+1) = p_sum_i + MOVAPS X2, X3 + MOVAPS X4, X5 + MOVAPS X6, X7 + ADDQ $8, AX // i += 8 + LOOP absum_loop // } while --CX > 0 + + // p_sum_0 = \sum_{i=1}^{3}( p_sum_(i*2) ) + ADDPD X3, X0 + ADDPD X5, X7 + ADDPD X7, X0 + + // p_sum_0[0] = p_sum_0[0] + p_sum_0[1] + MOVAPS X0, X1 + SHUFPD $0x3, X0, X0 // lower( p_sum_0 ) = upper( p_sum_0 ) + ADDSD X1, X0 + CMPQ BX, $0 + JE absum_end // if BX == 0 { goto absum_end } + +absum_tail_start: // Reset loop registers + MOVQ BX, CX // Loop counter: CX = BX + XORPS X8, X8 // X_8 = 0 + +absum_tail: // do { + // p_sum += max( p_sum + x[i], p_sum - x[i] ) + MOVSD (SI)(AX*8), X8 // X_8 = x[i] + MOVSD X0, X1 // p_sum_1 = p_sum_0 + ADDSD X8, X0 // p_sum_0 += X_8 + SUBSD X8, X1 // p_sum_1 -= X_8 + MAXSD X1, X0 // p_sum_0 = max( p_sum_0, p_sum_1 ) + INCQ AX // i++ + LOOP absum_tail // } while --CX > 0 + +absum_end: // return p_sum_0 + MOVSD X0, sum+24(FP) + RET diff --git a/vendor/gonum.org/v1/gonum/internal/asm/f64/abssuminc_amd64.s b/vendor/gonum.org/v1/gonum/internal/asm/f64/abssuminc_amd64.s new file mode 100644 index 0000000000..647517333c --- /dev/null +++ b/vendor/gonum.org/v1/gonum/internal/asm/f64/abssuminc_amd64.s @@ -0,0 +1,90 @@ +// Copyright ©2016 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build !noasm,!gccgo,!safe + +#include "textflag.h" + +// func L1NormInc(x []float64, n, incX int) (sum float64) +TEXT ·L1NormInc(SB), NOSPLIT, $0 + MOVQ x_base+0(FP), SI // SI = &x + MOVQ n+24(FP), CX // CX = n + MOVQ incX+32(FP), AX // AX = increment * sizeof( float64 ) + SHLQ $3, AX + MOVQ AX, DX // DX = AX * 3 + IMULQ $3, DX + PXOR X0, X0 // p_sum_i = 0 + PXOR X1, X1 + PXOR X2, X2 + PXOR X3, X3 + PXOR X4, X4 + PXOR X5, X5 + PXOR X6, X6 + PXOR X7, X7 + CMPQ CX, $0 // if CX == 0 { return 0 } + JE absum_end + MOVQ CX, BX + ANDQ $7, BX // BX = n % 8 + SHRQ $3, CX // CX = floor( n / 8 ) + JZ absum_tail_start // if CX == 0 { goto absum_tail_start } + +absum_loop: // do { + // p_sum = max( p_sum + x[i], p_sum - x[i] ) + MOVSD (SI), X8 // X_i[0] = x[i] + MOVSD (SI)(AX*1), X9 + MOVSD (SI)(AX*2), X10 + MOVSD (SI)(DX*1), X11 + LEAQ (SI)(AX*4), SI // SI = SI + 4 + MOVHPD (SI), X8 // X_i[1] = x[i+4] + MOVHPD (SI)(AX*1), X9 + MOVHPD (SI)(AX*2), X10 + MOVHPD (SI)(DX*1), X11 + ADDPD X8, X0 // p_sum_i += X_i ( positive values ) + ADDPD X9, X2 + ADDPD X10, X4 + ADDPD X11, X6 + SUBPD X8, X1 // p_sum_(i+1) -= X_i ( negative values ) + SUBPD X9, X3 + SUBPD X10, X5 + SUBPD X11, X7 + MAXPD X1, X0 // p_sum_i = max( p_sum_i, p_sum_(i+1) ) + MAXPD X3, X2 + MAXPD X5, X4 + MAXPD X7, X6 + MOVAPS X0, X1 // p_sum_(i+1) = p_sum_i + MOVAPS X2, X3 + MOVAPS X4, X5 + MOVAPS X6, X7 + LEAQ (SI)(AX*4), SI // SI = SI + 4 + LOOP absum_loop // } while --CX > 0 + + // p_sum_0 = \sum_{i=1}^{3}( p_sum_(i*2) ) + ADDPD X3, X0 + ADDPD X5, X7 + ADDPD X7, X0 + + // p_sum_0[0] = p_sum_0[0] + p_sum_0[1] + MOVAPS X0, X1 + SHUFPD $0x3, X0, X0 // lower( p_sum_0 ) = upper( p_sum_0 ) + ADDSD X1, X0 + CMPQ BX, $0 + JE absum_end // if BX == 0 { goto absum_end } + +absum_tail_start: // Reset loop registers + MOVQ BX, CX // Loop counter: CX = BX + XORPS X8, X8 // X_8 = 0 + +absum_tail: // do { + // p_sum += max( p_sum + x[i], p_sum - x[i] ) + MOVSD (SI), X8 // X_8 = x[i] + MOVSD X0, X1 // p_sum_1 = p_sum_0 + ADDSD X8, X0 // p_sum_0 += X_8 + SUBSD X8, X1 // p_sum_1 -= X_8 + MAXSD X1, X0 // p_sum_0 = max( p_sum_0, p_sum_1 ) + ADDQ AX, SI // i++ + LOOP absum_tail // } while --CX > 0 + +absum_end: // return p_sum_0 + MOVSD X0, sum+40(FP) + RET diff --git a/vendor/gonum.org/v1/gonum/internal/asm/f64/add_amd64.s b/vendor/gonum.org/v1/gonum/internal/asm/f64/add_amd64.s new file mode 100644 index 0000000000..e377f51256 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/internal/asm/f64/add_amd64.s @@ -0,0 +1,66 @@ +// Copyright ©2016 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build !noasm,!gccgo,!safe + +#include "textflag.h" + +// func Add(dst, s []float64) +TEXT ·Add(SB), NOSPLIT, $0 + MOVQ dst_base+0(FP), DI // DI = &dst + MOVQ dst_len+8(FP), CX // CX = len(dst) + MOVQ s_base+24(FP), SI // SI = &s + CMPQ s_len+32(FP), CX // CX = max( CX, len(s) ) + CMOVQLE s_len+32(FP), CX + CMPQ CX, $0 // if CX == 0 { return } + JE add_end + XORQ AX, AX + MOVQ DI, BX + ANDQ $0x0F, BX // BX = &dst & 15 + JZ add_no_trim // if BX == 0 { goto add_no_trim } + + // Align on 16-bit boundary + MOVSD (SI)(AX*8), X0 // X0 = s[i] + ADDSD (DI)(AX*8), X0 // X0 += dst[i] + MOVSD X0, (DI)(AX*8) // dst[i] = X0 + INCQ AX // i++ + DECQ CX // --CX + JE add_end // if CX == 0 { return } + +add_no_trim: + MOVQ CX, BX + ANDQ $7, BX // BX = len(dst) % 8 + SHRQ $3, CX // CX = floor( len(dst) / 8 ) + JZ add_tail_start // if CX == 0 { goto add_tail_start } + +add_loop: // Loop unrolled 8x do { + MOVUPS (SI)(AX*8), X0 // X_i = s[i:i+1] + MOVUPS 16(SI)(AX*8), X1 + MOVUPS 32(SI)(AX*8), X2 + MOVUPS 48(SI)(AX*8), X3 + ADDPD (DI)(AX*8), X0 // X_i += dst[i:i+1] + ADDPD 16(DI)(AX*8), X1 + ADDPD 32(DI)(AX*8), X2 + ADDPD 48(DI)(AX*8), X3 + MOVUPS X0, (DI)(AX*8) // dst[i:i+1] = X_i + MOVUPS X1, 16(DI)(AX*8) + MOVUPS X2, 32(DI)(AX*8) + MOVUPS X3, 48(DI)(AX*8) + ADDQ $8, AX // i += 8 + LOOP add_loop // } while --CX > 0 + CMPQ BX, $0 // if BX == 0 { return } + JE add_end + +add_tail_start: // Reset loop registers + MOVQ BX, CX // Loop counter: CX = BX + +add_tail: // do { + MOVSD (SI)(AX*8), X0 // X0 = s[i] + ADDSD (DI)(AX*8), X0 // X0 += dst[i] + MOVSD X0, (DI)(AX*8) // dst[i] = X0 + INCQ AX // ++i + LOOP add_tail // } while --CX > 0 + +add_end: + RET diff --git a/vendor/gonum.org/v1/gonum/internal/asm/f64/addconst_amd64.s b/vendor/gonum.org/v1/gonum/internal/asm/f64/addconst_amd64.s new file mode 100644 index 0000000000..6f52a8f64f --- /dev/null +++ b/vendor/gonum.org/v1/gonum/internal/asm/f64/addconst_amd64.s @@ -0,0 +1,53 @@ +// Copyright ©2016 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build !noasm,!gccgo,!safe + +#include "textflag.h" + +// func Addconst(alpha float64, x []float64) +TEXT ·AddConst(SB), NOSPLIT, $0 + MOVQ x_base+8(FP), SI // SI = &x + MOVQ x_len+16(FP), CX // CX = len(x) + CMPQ CX, $0 // if len(x) == 0 { return } + JE ac_end + MOVSD alpha+0(FP), X4 // X4 = { a, a } + SHUFPD $0, X4, X4 + MOVUPS X4, X5 // X5 = X4 + XORQ AX, AX // i = 0 + MOVQ CX, BX + ANDQ $7, BX // BX = len(x) % 8 + SHRQ $3, CX // CX = floor( len(x) / 8 ) + JZ ac_tail_start // if CX == 0 { goto ac_tail_start } + +ac_loop: // Loop unrolled 8x do { + MOVUPS (SI)(AX*8), X0 // X_i = s[i:i+1] + MOVUPS 16(SI)(AX*8), X1 + MOVUPS 32(SI)(AX*8), X2 + MOVUPS 48(SI)(AX*8), X3 + ADDPD X4, X0 // X_i += a + ADDPD X5, X1 + ADDPD X4, X2 + ADDPD X5, X3 + MOVUPS X0, (SI)(AX*8) // s[i:i+1] = X_i + MOVUPS X1, 16(SI)(AX*8) + MOVUPS X2, 32(SI)(AX*8) + MOVUPS X3, 48(SI)(AX*8) + ADDQ $8, AX // i += 8 + LOOP ac_loop // } while --CX > 0 + CMPQ BX, $0 // if BX == 0 { return } + JE ac_end + +ac_tail_start: // Reset loop counters + MOVQ BX, CX // Loop counter: CX = BX + +ac_tail: // do { + MOVSD (SI)(AX*8), X0 // X0 = s[i] + ADDSD X4, X0 // X0 += a + MOVSD X0, (SI)(AX*8) // s[i] = X0 + INCQ AX // ++i + LOOP ac_tail // } while --CX > 0 + +ac_end: + RET diff --git a/vendor/gonum.org/v1/gonum/internal/asm/f64/axpy.go b/vendor/gonum.org/v1/gonum/internal/asm/f64/axpy.go new file mode 100644 index 0000000000..2ab8129a54 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/internal/asm/f64/axpy.go @@ -0,0 +1,62 @@ +// Copyright ©2015 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build !amd64 || noasm || gccgo || safe +// +build !amd64 noasm gccgo safe + +package f64 + +// AxpyUnitary is +// +// for i, v := range x { +// y[i] += alpha * v +// } +func AxpyUnitary(alpha float64, x, y []float64) { + for i, v := range x { + y[i] += alpha * v + } +} + +// AxpyUnitaryTo is +// +// for i, v := range x { +// dst[i] = alpha*v + y[i] +// } +func AxpyUnitaryTo(dst []float64, alpha float64, x, y []float64) { + for i, v := range x { + dst[i] = alpha*v + y[i] + } +} + +// AxpyInc is +// +// for i := 0; i < int(n); i++ { +// y[iy] += alpha * x[ix] +// ix += incX +// iy += incY +// } +func AxpyInc(alpha float64, x, y []float64, n, incX, incY, ix, iy uintptr) { + for i := 0; i < int(n); i++ { + y[iy] += alpha * x[ix] + ix += incX + iy += incY + } +} + +// AxpyIncTo is +// +// for i := 0; i < int(n); i++ { +// dst[idst] = alpha*x[ix] + y[iy] +// ix += incX +// iy += incY +// idst += incDst +// } +func AxpyIncTo(dst []float64, incDst, idst uintptr, alpha float64, x, y []float64, n, incX, incY, ix, iy uintptr) { + for i := 0; i < int(n); i++ { + dst[idst] = alpha*x[ix] + y[iy] + ix += incX + iy += incY + idst += incDst + } +} diff --git a/vendor/gonum.org/v1/gonum/internal/asm/f64/axpyinc_amd64.s b/vendor/gonum.org/v1/gonum/internal/asm/f64/axpyinc_amd64.s new file mode 100644 index 0000000000..a4e180fbfa --- /dev/null +++ b/vendor/gonum.org/v1/gonum/internal/asm/f64/axpyinc_amd64.s @@ -0,0 +1,142 @@ +// Copyright ©2015 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. +// +// Some of the loop unrolling code is copied from: +// http://golang.org/src/math/big/arith_amd64.s +// which is distributed under these terms: +// +// Copyright (c) 2012 The Go Authors. All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// +build !noasm,!gccgo,!safe + +#include "textflag.h" + +#define X_PTR SI +#define Y_PTR DI +#define DST_PTR DI +#define IDX AX +#define LEN CX +#define TAIL BX +#define INC_X R8 +#define INCx3_X R11 +#define INC_Y R9 +#define INCx3_Y R12 +#define INC_DST R9 +#define INCx3_DST R12 +#define ALPHA X0 +#define ALPHA_2 X1 + +// func AxpyInc(alpha float64, x, y []float64, n, incX, incY, ix, iy uintptr) +TEXT ·AxpyInc(SB), NOSPLIT, $0 + MOVQ x_base+8(FP), X_PTR // X_PTR = &x + MOVQ y_base+32(FP), Y_PTR // Y_PTR = &y + MOVQ n+56(FP), LEN // LEN = n + CMPQ LEN, $0 // if LEN == 0 { return } + JE end + + MOVQ ix+80(FP), INC_X + MOVQ iy+88(FP), INC_Y + LEAQ (X_PTR)(INC_X*8), X_PTR // X_PTR = &(x[ix]) + LEAQ (Y_PTR)(INC_Y*8), Y_PTR // Y_PTR = &(y[iy]) + MOVQ Y_PTR, DST_PTR // DST_PTR = Y_PTR // Write pointer + + MOVQ incX+64(FP), INC_X // INC_X = incX * sizeof(float64) + SHLQ $3, INC_X + MOVQ incY+72(FP), INC_Y // INC_Y = incY * sizeof(float64) + SHLQ $3, INC_Y + + MOVSD alpha+0(FP), ALPHA // ALPHA = alpha + MOVQ LEN, TAIL + ANDQ $3, TAIL // TAIL = n % 4 + SHRQ $2, LEN // LEN = floor( n / 4 ) + JZ tail_start // if LEN == 0 { goto tail_start } + + MOVAPS ALPHA, ALPHA_2 // ALPHA_2 = ALPHA for pipelining + LEAQ (INC_X)(INC_X*2), INCx3_X // INCx3_X = INC_X * 3 + LEAQ (INC_Y)(INC_Y*2), INCx3_Y // INCx3_Y = INC_Y * 3 + +loop: // do { // y[i] += alpha * x[i] unrolled 4x. + MOVSD (X_PTR), X2 // X_i = x[i] + MOVSD (X_PTR)(INC_X*1), X3 + MOVSD (X_PTR)(INC_X*2), X4 + MOVSD (X_PTR)(INCx3_X*1), X5 + + MULSD ALPHA, X2 // X_i *= a + MULSD ALPHA_2, X3 + MULSD ALPHA, X4 + MULSD ALPHA_2, X5 + + ADDSD (Y_PTR), X2 // X_i += y[i] + ADDSD (Y_PTR)(INC_Y*1), X3 + ADDSD (Y_PTR)(INC_Y*2), X4 + ADDSD (Y_PTR)(INCx3_Y*1), X5 + + MOVSD X2, (DST_PTR) // y[i] = X_i + MOVSD X3, (DST_PTR)(INC_DST*1) + MOVSD X4, (DST_PTR)(INC_DST*2) + MOVSD X5, (DST_PTR)(INCx3_DST*1) + + LEAQ (X_PTR)(INC_X*4), X_PTR // X_PTR = &(X_PTR[incX*4]) + LEAQ (Y_PTR)(INC_Y*4), Y_PTR // Y_PTR = &(Y_PTR[incY*4]) + DECQ LEN + JNZ loop // } while --LEN > 0 + CMPQ TAIL, $0 // if TAIL == 0 { return } + JE end + +tail_start: // Reset Loop registers + MOVQ TAIL, LEN // Loop counter: LEN = TAIL + SHRQ $1, LEN // LEN = floor( LEN / 2 ) + JZ tail_one + +tail_two: + MOVSD (X_PTR), X2 // X_i = x[i] + MOVSD (X_PTR)(INC_X*1), X3 + MULSD ALPHA, X2 // X_i *= a + MULSD ALPHA, X3 + ADDSD (Y_PTR), X2 // X_i += y[i] + ADDSD (Y_PTR)(INC_Y*1), X3 + MOVSD X2, (DST_PTR) // y[i] = X_i + MOVSD X3, (DST_PTR)(INC_DST*1) + + LEAQ (X_PTR)(INC_X*2), X_PTR // X_PTR = &(X_PTR[incX*2]) + LEAQ (Y_PTR)(INC_Y*2), Y_PTR // Y_PTR = &(Y_PTR[incY*2]) + + ANDQ $1, TAIL + JZ end // if TAIL == 0 { goto end } + +tail_one: + // y[i] += alpha * x[i] for the last n % 4 iterations. + MOVSD (X_PTR), X2 // X2 = x[i] + MULSD ALPHA, X2 // X2 *= a + ADDSD (Y_PTR), X2 // X2 += y[i] + MOVSD X2, (DST_PTR) // y[i] = X2 + +end: + RET diff --git a/vendor/gonum.org/v1/gonum/internal/asm/f64/axpyincto_amd64.s b/vendor/gonum.org/v1/gonum/internal/asm/f64/axpyincto_amd64.s new file mode 100644 index 0000000000..0f54a39400 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/internal/asm/f64/axpyincto_amd64.s @@ -0,0 +1,148 @@ +// Copyright ©2015 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. +// +// Some of the loop unrolling code is copied from: +// http://golang.org/src/math/big/arith_amd64.s +// which is distributed under these terms: +// +// Copyright (c) 2012 The Go Authors. All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// +build !noasm,!gccgo,!safe + +#include "textflag.h" + +#define X_PTR SI +#define Y_PTR DI +#define DST_PTR DX +#define IDX AX +#define LEN CX +#define TAIL BX +#define INC_X R8 +#define INCx3_X R11 +#define INC_Y R9 +#define INCx3_Y R12 +#define INC_DST R10 +#define INCx3_DST R13 +#define ALPHA X0 +#define ALPHA_2 X1 + +// func AxpyIncTo(dst []float64, incDst, idst uintptr, alpha float64, x, y []float64, n, incX, incY, ix, iy uintptr) +TEXT ·AxpyIncTo(SB), NOSPLIT, $0 + MOVQ dst_base+0(FP), DST_PTR // DST_PTR := &dst + MOVQ x_base+48(FP), X_PTR // X_PTR := &x + MOVQ y_base+72(FP), Y_PTR // Y_PTR := &y + MOVQ n+96(FP), LEN // LEN := n + CMPQ LEN, $0 // if LEN == 0 { return } + JE end + + MOVQ ix+120(FP), INC_X + LEAQ (X_PTR)(INC_X*8), X_PTR // X_PTR = &(x[ix]) + MOVQ iy+128(FP), INC_Y + LEAQ (Y_PTR)(INC_Y*8), Y_PTR // Y_PTR = &(dst[idst]) + MOVQ idst+32(FP), INC_DST + LEAQ (DST_PTR)(INC_DST*8), DST_PTR // DST_PTR = &(y[iy]) + + MOVQ incX+104(FP), INC_X // INC_X = incX * sizeof(float64) + SHLQ $3, INC_X + MOVQ incY+112(FP), INC_Y // INC_Y = incY * sizeof(float64) + SHLQ $3, INC_Y + MOVQ incDst+24(FP), INC_DST // INC_DST = incDst * sizeof(float64) + SHLQ $3, INC_DST + MOVSD alpha+40(FP), ALPHA + + MOVQ LEN, TAIL + ANDQ $3, TAIL // TAIL = n % 4 + SHRQ $2, LEN // LEN = floor( n / 4 ) + JZ tail_start // if LEN == 0 { goto tail_start } + + MOVSD ALPHA, ALPHA_2 // ALPHA_2 = ALPHA for pipelining + LEAQ (INC_X)(INC_X*2), INCx3_X // INCx3_X = INC_X * 3 + LEAQ (INC_Y)(INC_Y*2), INCx3_Y // INCx3_Y = INC_Y * 3 + LEAQ (INC_DST)(INC_DST*2), INCx3_DST // INCx3_DST = INC_DST * 3 + +loop: // do { // y[i] += alpha * x[i] unrolled 2x. + MOVSD (X_PTR), X2 // X_i = x[i] + MOVSD (X_PTR)(INC_X*1), X3 + MOVSD (X_PTR)(INC_X*2), X4 + MOVSD (X_PTR)(INCx3_X*1), X5 + + MULSD ALPHA, X2 // X_i *= a + MULSD ALPHA_2, X3 + MULSD ALPHA, X4 + MULSD ALPHA_2, X5 + + ADDSD (Y_PTR), X2 // X_i += y[i] + ADDSD (Y_PTR)(INC_Y*1), X3 + ADDSD (Y_PTR)(INC_Y*2), X4 + ADDSD (Y_PTR)(INCx3_Y*1), X5 + + MOVSD X2, (DST_PTR) // y[i] = X_i + MOVSD X3, (DST_PTR)(INC_DST*1) + MOVSD X4, (DST_PTR)(INC_DST*2) + MOVSD X5, (DST_PTR)(INCx3_DST*1) + + LEAQ (X_PTR)(INC_X*4), X_PTR // X_PTR = &(X_PTR[incX*4]) + LEAQ (Y_PTR)(INC_Y*4), Y_PTR // Y_PTR = &(Y_PTR[incY*4]) + LEAQ (DST_PTR)(INC_DST*4), DST_PTR // DST_PTR = &(DST_PTR[incDst*4] + DECQ LEN + JNZ loop // } while --LEN > 0 + CMPQ TAIL, $0 // if TAIL == 0 { return } + JE end + +tail_start: // Reset Loop registers + MOVQ TAIL, LEN // Loop counter: LEN = TAIL + SHRQ $1, LEN // LEN = floor( LEN / 2 ) + JZ tail_one + +tail_two: + MOVSD (X_PTR), X2 // X_i = x[i] + MOVSD (X_PTR)(INC_X*1), X3 + MULSD ALPHA, X2 // X_i *= a + MULSD ALPHA, X3 + ADDSD (Y_PTR), X2 // X_i += y[i] + ADDSD (Y_PTR)(INC_Y*1), X3 + MOVSD X2, (DST_PTR) // y[i] = X_i + MOVSD X3, (DST_PTR)(INC_DST*1) + + LEAQ (X_PTR)(INC_X*2), X_PTR // X_PTR = &(X_PTR[incX*2]) + LEAQ (Y_PTR)(INC_Y*2), Y_PTR // Y_PTR = &(Y_PTR[incY*2]) + LEAQ (DST_PTR)(INC_DST*2), DST_PTR // DST_PTR = &(DST_PTR[incY*2] + + ANDQ $1, TAIL + JZ end // if TAIL == 0 { goto end } + +tail_one: + MOVSD (X_PTR), X2 // X2 = x[i] + MULSD ALPHA, X2 // X2 *= a + ADDSD (Y_PTR), X2 // X2 += y[i] + MOVSD X2, (DST_PTR) // y[i] = X2 + +end: + RET diff --git a/vendor/gonum.org/v1/gonum/internal/asm/f64/axpyunitary_amd64.s b/vendor/gonum.org/v1/gonum/internal/asm/f64/axpyunitary_amd64.s new file mode 100644 index 0000000000..f0b78596b6 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/internal/asm/f64/axpyunitary_amd64.s @@ -0,0 +1,134 @@ +// Copyright ©2015 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. +// +// Some of the loop unrolling code is copied from: +// http://golang.org/src/math/big/arith_amd64.s +// which is distributed under these terms: +// +// Copyright (c) 2012 The Go Authors. All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// +build !noasm,!gccgo,!safe + +#include "textflag.h" + +#define X_PTR SI +#define Y_PTR DI +#define DST_PTR DI +#define IDX AX +#define LEN CX +#define TAIL BX +#define ALPHA X0 +#define ALPHA_2 X1 + +// func AxpyUnitary(alpha float64, x, y []float64) +TEXT ·AxpyUnitary(SB), NOSPLIT, $0 + MOVQ x_base+8(FP), X_PTR // X_PTR := &x + MOVQ y_base+32(FP), Y_PTR // Y_PTR := &y + MOVQ x_len+16(FP), LEN // LEN = min( len(x), len(y) ) + CMPQ y_len+40(FP), LEN + CMOVQLE y_len+40(FP), LEN + CMPQ LEN, $0 // if LEN == 0 { return } + JE end + XORQ IDX, IDX + MOVSD alpha+0(FP), ALPHA // ALPHA := { alpha, alpha } + SHUFPD $0, ALPHA, ALPHA + MOVUPS ALPHA, ALPHA_2 // ALPHA_2 := ALPHA for pipelining + MOVQ Y_PTR, TAIL // Check memory alignment + ANDQ $15, TAIL // TAIL = &y % 16 + JZ no_trim // if TAIL == 0 { goto no_trim } + + // Align on 16-byte boundary + MOVSD (X_PTR), X2 // X2 := x[0] + MULSD ALPHA, X2 // X2 *= a + ADDSD (Y_PTR), X2 // X2 += y[0] + MOVSD X2, (DST_PTR) // y[0] = X2 + INCQ IDX // i++ + DECQ LEN // LEN-- + JZ end // if LEN == 0 { return } + +no_trim: + MOVQ LEN, TAIL + ANDQ $7, TAIL // TAIL := n % 8 + SHRQ $3, LEN // LEN = floor( n / 8 ) + JZ tail_start // if LEN == 0 { goto tail2_start } + +loop: // do { + // y[i] += alpha * x[i] unrolled 8x. + MOVUPS (X_PTR)(IDX*8), X2 // X_i = x[i] + MOVUPS 16(X_PTR)(IDX*8), X3 + MOVUPS 32(X_PTR)(IDX*8), X4 + MOVUPS 48(X_PTR)(IDX*8), X5 + + MULPD ALPHA, X2 // X_i *= a + MULPD ALPHA_2, X3 + MULPD ALPHA, X4 + MULPD ALPHA_2, X5 + + ADDPD (Y_PTR)(IDX*8), X2 // X_i += y[i] + ADDPD 16(Y_PTR)(IDX*8), X3 + ADDPD 32(Y_PTR)(IDX*8), X4 + ADDPD 48(Y_PTR)(IDX*8), X5 + + MOVUPS X2, (DST_PTR)(IDX*8) // y[i] = X_i + MOVUPS X3, 16(DST_PTR)(IDX*8) + MOVUPS X4, 32(DST_PTR)(IDX*8) + MOVUPS X5, 48(DST_PTR)(IDX*8) + + ADDQ $8, IDX // i += 8 + DECQ LEN + JNZ loop // } while --LEN > 0 + CMPQ TAIL, $0 // if TAIL == 0 { return } + JE end + +tail_start: // Reset loop registers + MOVQ TAIL, LEN // Loop counter: LEN = TAIL + SHRQ $1, LEN // LEN = floor( TAIL / 2 ) + JZ tail_one // if TAIL == 0 { goto tail } + +tail_two: // do { + MOVUPS (X_PTR)(IDX*8), X2 // X2 = x[i] + MULPD ALPHA, X2 // X2 *= a + ADDPD (Y_PTR)(IDX*8), X2 // X2 += y[i] + MOVUPS X2, (DST_PTR)(IDX*8) // y[i] = X2 + ADDQ $2, IDX // i += 2 + DECQ LEN + JNZ tail_two // } while --LEN > 0 + + ANDQ $1, TAIL + JZ end // if TAIL == 0 { goto end } + +tail_one: + MOVSD (X_PTR)(IDX*8), X2 // X2 = x[i] + MULSD ALPHA, X2 // X2 *= a + ADDSD (Y_PTR)(IDX*8), X2 // X2 += y[i] + MOVSD X2, (DST_PTR)(IDX*8) // y[i] = X2 + +end: + RET diff --git a/vendor/gonum.org/v1/gonum/internal/asm/f64/axpyunitaryto_amd64.s b/vendor/gonum.org/v1/gonum/internal/asm/f64/axpyunitaryto_amd64.s new file mode 100644 index 0000000000..dbb0a7eaba --- /dev/null +++ b/vendor/gonum.org/v1/gonum/internal/asm/f64/axpyunitaryto_amd64.s @@ -0,0 +1,140 @@ +// Copyright ©2015 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. +// +// Some of the loop unrolling code is copied from: +// http://golang.org/src/math/big/arith_amd64.s +// which is distributed under these terms: +// +// Copyright (c) 2012 The Go Authors. All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// +build !noasm,!gccgo,!safe + +#include "textflag.h" + +#define X_PTR SI +#define Y_PTR DX +#define DST_PTR DI +#define IDX AX +#define LEN CX +#define TAIL BX +#define ALPHA X0 +#define ALPHA_2 X1 + +// func AxpyUnitaryTo(dst []float64, alpha float64, x, y []float64) +TEXT ·AxpyUnitaryTo(SB), NOSPLIT, $0 + MOVQ dst_base+0(FP), DST_PTR // DST_PTR := &dst + MOVQ x_base+32(FP), X_PTR // X_PTR := &x + MOVQ y_base+56(FP), Y_PTR // Y_PTR := &y + MOVQ x_len+40(FP), LEN // LEN = min( len(x), len(y), len(dst) ) + CMPQ y_len+64(FP), LEN + CMOVQLE y_len+64(FP), LEN + CMPQ dst_len+8(FP), LEN + CMOVQLE dst_len+8(FP), LEN + + CMPQ LEN, $0 + JE end // if LEN == 0 { return } + + XORQ IDX, IDX // IDX = 0 + MOVSD alpha+24(FP), ALPHA + SHUFPD $0, ALPHA, ALPHA // ALPHA := { alpha, alpha } + MOVQ Y_PTR, TAIL // Check memory alignment + ANDQ $15, TAIL // TAIL = &y % 16 + JZ no_trim // if TAIL == 0 { goto no_trim } + + // Align on 16-byte boundary + MOVSD (X_PTR), X2 // X2 := x[0] + MULSD ALPHA, X2 // X2 *= a + ADDSD (Y_PTR), X2 // X2 += y[0] + MOVSD X2, (DST_PTR) // y[0] = X2 + INCQ IDX // i++ + DECQ LEN // LEN-- + JZ end // if LEN == 0 { return } + +no_trim: + MOVQ LEN, TAIL + ANDQ $7, TAIL // TAIL := n % 8 + SHRQ $3, LEN // LEN = floor( n / 8 ) + JZ tail_start // if LEN == 0 { goto tail_start } + + MOVUPS ALPHA, ALPHA_2 // ALPHA_2 := ALPHA for pipelining + +loop: // do { + // y[i] += alpha * x[i] unrolled 8x. + MOVUPS (X_PTR)(IDX*8), X2 // X_i = x[i] + MOVUPS 16(X_PTR)(IDX*8), X3 + MOVUPS 32(X_PTR)(IDX*8), X4 + MOVUPS 48(X_PTR)(IDX*8), X5 + + MULPD ALPHA, X2 // X_i *= alpha + MULPD ALPHA_2, X3 + MULPD ALPHA, X4 + MULPD ALPHA_2, X5 + + ADDPD (Y_PTR)(IDX*8), X2 // X_i += y[i] + ADDPD 16(Y_PTR)(IDX*8), X3 + ADDPD 32(Y_PTR)(IDX*8), X4 + ADDPD 48(Y_PTR)(IDX*8), X5 + + MOVUPS X2, (DST_PTR)(IDX*8) // y[i] = X_i + MOVUPS X3, 16(DST_PTR)(IDX*8) + MOVUPS X4, 32(DST_PTR)(IDX*8) + MOVUPS X5, 48(DST_PTR)(IDX*8) + + ADDQ $8, IDX // i += 8 + DECQ LEN + JNZ loop // } while --LEN > 0 + CMPQ TAIL, $0 // if TAIL == 0 { return } + JE end + +tail_start: // Reset loop registers + MOVQ TAIL, LEN // Loop counter: LEN = TAIL + SHRQ $1, LEN // LEN = floor( TAIL / 2 ) + JZ tail_one // if LEN == 0 { goto tail } + +tail_two: // do { + MOVUPS (X_PTR)(IDX*8), X2 // X2 = x[i] + MULPD ALPHA, X2 // X2 *= alpha + ADDPD (Y_PTR)(IDX*8), X2 // X2 += y[i] + MOVUPS X2, (DST_PTR)(IDX*8) // y[i] = X2 + ADDQ $2, IDX // i += 2 + DECQ LEN + JNZ tail_two // } while --LEN > 0 + + ANDQ $1, TAIL + JZ end // if TAIL == 0 { goto end } + +tail_one: + MOVSD (X_PTR)(IDX*8), X2 // X2 = x[i] + MULSD ALPHA, X2 // X2 *= a + ADDSD (Y_PTR)(IDX*8), X2 // X2 += y[i] + MOVSD X2, (DST_PTR)(IDX*8) // y[i] = X2 + +end: + RET diff --git a/vendor/gonum.org/v1/gonum/internal/asm/f64/cumprod_amd64.s b/vendor/gonum.org/v1/gonum/internal/asm/f64/cumprod_amd64.s new file mode 100644 index 0000000000..58168482d8 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/internal/asm/f64/cumprod_amd64.s @@ -0,0 +1,71 @@ +// Copyright ©2016 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build !noasm,!gccgo,!safe + +#include "textflag.h" + +TEXT ·CumProd(SB), NOSPLIT, $0 + MOVQ dst_base+0(FP), DI // DI = &dst + MOVQ dst_len+8(FP), CX // CX = len(dst) + MOVQ s_base+24(FP), SI // SI = &s + CMPQ s_len+32(FP), CX // CX = max( CX, len(s) ) + CMOVQLE s_len+32(FP), CX + MOVQ CX, ret_len+56(FP) // len(ret) = CX + CMPQ CX, $0 // if CX == 0 { return } + JE cp_end + XORQ AX, AX // i = 0 + + MOVSD (SI), X5 // p_prod = { s[0], s[0] } + SHUFPD $0, X5, X5 + MOVSD X5, (DI) // dst[0] = s[0] + INCQ AX // ++i + DECQ CX // -- CX + JZ cp_end // if CX == 0 { return } + + MOVQ CX, BX + ANDQ $3, BX // BX = CX % 4 + SHRQ $2, CX // CX = floor( CX / 4 ) + JZ cp_tail_start // if CX == 0 { goto cp_tail_start } + +cp_loop: // Loop unrolled 4x do { + MOVUPS (SI)(AX*8), X0 // X0 = s[i:i+1] + MOVUPS 16(SI)(AX*8), X2 + MOVAPS X0, X1 // X1 = X0 + MOVAPS X2, X3 + SHUFPD $1, X1, X1 // { X1[0], X1[1] } = { X1[1], X1[0] } + SHUFPD $1, X3, X3 + MULPD X0, X1 // X1 *= X0 + MULPD X2, X3 + SHUFPD $2, X1, X0 // { X0[0], X0[1] } = { X0[0], X1[1] } + SHUFPD $3, X1, X1 // { X1[0], X1[1] } = { X1[1], X1[1] } + SHUFPD $2, X3, X2 + SHUFPD $3, X3, X3 + MULPD X5, X0 // X0 *= p_prod + MULPD X1, X5 // p_prod *= X1 + MULPD X5, X2 + MOVUPS X0, (DI)(AX*8) // dst[i] = X0 + MOVUPS X2, 16(DI)(AX*8) + MULPD X3, X5 + ADDQ $4, AX // i += 4 + LOOP cp_loop // } while --CX > 0 + + // if BX == 0 { return } + CMPQ BX, $0 + JE cp_end + +cp_tail_start: // Reset loop registers + MOVQ BX, CX // Loop counter: CX = BX + +cp_tail: // do { + MULSD (SI)(AX*8), X5 // p_prod *= s[i] + MOVSD X5, (DI)(AX*8) // dst[i] = p_prod + INCQ AX // ++i + LOOP cp_tail // } while --CX > 0 + +cp_end: + MOVQ DI, ret_base+48(FP) // &ret = &dst + MOVQ dst_cap+16(FP), SI // cap(ret) = cap(dst) + MOVQ SI, ret_cap+64(FP) + RET diff --git a/vendor/gonum.org/v1/gonum/internal/asm/f64/cumsum_amd64.s b/vendor/gonum.org/v1/gonum/internal/asm/f64/cumsum_amd64.s new file mode 100644 index 0000000000..85613202a4 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/internal/asm/f64/cumsum_amd64.s @@ -0,0 +1,64 @@ +// Copyright ©2016 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build !noasm,!gccgo,!safe + +#include "textflag.h" + +TEXT ·CumSum(SB), NOSPLIT, $0 + MOVQ dst_base+0(FP), DI // DI = &dst + MOVQ dst_len+8(FP), CX // CX = len(dst) + MOVQ s_base+24(FP), SI // SI = &s + CMPQ s_len+32(FP), CX // CX = max( CX, len(s) ) + CMOVQLE s_len+32(FP), CX + MOVQ CX, ret_len+56(FP) // len(ret) = CX + CMPQ CX, $0 // if CX == 0 { return } + JE cs_end + XORQ AX, AX // i = 0 + PXOR X5, X5 // p_sum = 0 + MOVQ CX, BX + ANDQ $3, BX // BX = CX % 4 + SHRQ $2, CX // CX = floor( CX / 4 ) + JZ cs_tail_start // if CX == 0 { goto cs_tail_start } + +cs_loop: // Loop unrolled 4x do { + MOVUPS (SI)(AX*8), X0 // X0 = s[i:i+1] + MOVUPS 16(SI)(AX*8), X2 + MOVAPS X0, X1 // X1 = X0 + MOVAPS X2, X3 + SHUFPD $1, X1, X1 // { X1[0], X1[1] } = { X1[1], X1[0] } + SHUFPD $1, X3, X3 + ADDPD X0, X1 // X1 += X0 + ADDPD X2, X3 + SHUFPD $2, X1, X0 // { X0[0], X0[1] } = { X0[0], X1[1] } + SHUFPD $3, X1, X1 // { X1[0], X1[1] } = { X1[1], X1[1] } + SHUFPD $2, X3, X2 + SHUFPD $3, X3, X3 + ADDPD X5, X0 // X0 += p_sum + ADDPD X1, X5 // p_sum += X1 + ADDPD X5, X2 + MOVUPS X0, (DI)(AX*8) // dst[i] = X0 + MOVUPS X2, 16(DI)(AX*8) + ADDPD X3, X5 + ADDQ $4, AX // i += 4 + LOOP cs_loop // } while --CX > 0 + + // if BX == 0 { return } + CMPQ BX, $0 + JE cs_end + +cs_tail_start: // Reset loop registers + MOVQ BX, CX // Loop counter: CX = BX + +cs_tail: // do { + ADDSD (SI)(AX*8), X5 // p_sum *= s[i] + MOVSD X5, (DI)(AX*8) // dst[i] = p_sum + INCQ AX // ++i + LOOP cs_tail // } while --CX > 0 + +cs_end: + MOVQ DI, ret_base+48(FP) // &ret = &dst + MOVQ dst_cap+16(FP), SI // cap(ret) = cap(dst) + MOVQ SI, ret_cap+64(FP) + RET diff --git a/vendor/gonum.org/v1/gonum/internal/asm/f64/div_amd64.s b/vendor/gonum.org/v1/gonum/internal/asm/f64/div_amd64.s new file mode 100644 index 0000000000..9583976748 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/internal/asm/f64/div_amd64.s @@ -0,0 +1,67 @@ +// Copyright ©2016 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build !noasm,!gccgo,!safe + +#include "textflag.h" + +// func Div(dst, s []float64) +TEXT ·Div(SB), NOSPLIT, $0 + MOVQ dst_base+0(FP), DI // DI = &dst + MOVQ dst_len+8(FP), CX // CX = len(dst) + MOVQ s_base+24(FP), SI // SI = &s + CMPQ s_len+32(FP), CX // CX = max( CX, len(s) ) + CMOVQLE s_len+32(FP), CX + CMPQ CX, $0 // if CX == 0 { return } + JE div_end + XORQ AX, AX // i = 0 + MOVQ SI, BX + ANDQ $15, BX // BX = &s & 15 + JZ div_no_trim // if BX == 0 { goto div_no_trim } + + // Align on 16-bit boundary + MOVSD (DI)(AX*8), X0 // X0 = dst[i] + DIVSD (SI)(AX*8), X0 // X0 /= s[i] + MOVSD X0, (DI)(AX*8) // dst[i] = X0 + INCQ AX // ++i + DECQ CX // --CX + JZ div_end // if CX == 0 { return } + +div_no_trim: + MOVQ CX, BX + ANDQ $7, BX // BX = len(dst) % 8 + SHRQ $3, CX // CX = floor( len(dst) / 8 ) + JZ div_tail_start // if CX == 0 { goto div_tail_start } + +div_loop: // Loop unrolled 8x do { + MOVUPS (DI)(AX*8), X0 // X0 = dst[i:i+1] + MOVUPS 16(DI)(AX*8), X1 + MOVUPS 32(DI)(AX*8), X2 + MOVUPS 48(DI)(AX*8), X3 + DIVPD (SI)(AX*8), X0 // X0 /= s[i:i+1] + DIVPD 16(SI)(AX*8), X1 + DIVPD 32(SI)(AX*8), X2 + DIVPD 48(SI)(AX*8), X3 + MOVUPS X0, (DI)(AX*8) // dst[i] = X0 + MOVUPS X1, 16(DI)(AX*8) + MOVUPS X2, 32(DI)(AX*8) + MOVUPS X3, 48(DI)(AX*8) + ADDQ $8, AX // i += 8 + LOOP div_loop // } while --CX > 0 + CMPQ BX, $0 // if BX == 0 { return } + JE div_end + +div_tail_start: // Reset loop registers + MOVQ BX, CX // Loop counter: CX = BX + +div_tail: // do { + MOVSD (DI)(AX*8), X0 // X0 = dst[i] + DIVSD (SI)(AX*8), X0 // X0 /= s[i] + MOVSD X0, (DI)(AX*8) // dst[i] = X0 + INCQ AX // ++i + LOOP div_tail // } while --CX > 0 + +div_end: + RET + diff --git a/vendor/gonum.org/v1/gonum/internal/asm/f64/divto_amd64.s b/vendor/gonum.org/v1/gonum/internal/asm/f64/divto_amd64.s new file mode 100644 index 0000000000..e7094cb95b --- /dev/null +++ b/vendor/gonum.org/v1/gonum/internal/asm/f64/divto_amd64.s @@ -0,0 +1,73 @@ +// Copyright ©2016 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build !noasm,!gccgo,!safe + +#include "textflag.h" + +// func DivTo(dst, x, y []float64) +TEXT ·DivTo(SB), NOSPLIT, $0 + MOVQ dst_base+0(FP), DI // DI = &dst + MOVQ dst_len+8(FP), CX // CX = len(dst) + MOVQ x_base+24(FP), SI // SI = &x + MOVQ y_base+48(FP), DX // DX = &y + CMPQ x_len+32(FP), CX // CX = max( len(dst), len(x), len(y) ) + CMOVQLE x_len+32(FP), CX + CMPQ y_len+56(FP), CX + CMOVQLE y_len+56(FP), CX + MOVQ CX, ret_len+80(FP) // len(ret) = CX + CMPQ CX, $0 // if CX == 0 { return } + JE div_end + XORQ AX, AX // i = 0 + MOVQ DX, BX + ANDQ $15, BX // BX = &y & OxF + JZ div_no_trim // if BX == 0 { goto div_no_trim } + + // Align on 16-bit boundary + MOVSD (SI)(AX*8), X0 // X0 = s[i] + DIVSD (DX)(AX*8), X0 // X0 /= t[i] + MOVSD X0, (DI)(AX*8) // dst[i] = X0 + INCQ AX // ++i + DECQ CX // --CX + JZ div_end // if CX == 0 { return } + +div_no_trim: + MOVQ CX, BX + ANDQ $7, BX // BX = len(dst) % 8 + SHRQ $3, CX // CX = floor( len(dst) / 8 ) + JZ div_tail_start // if CX == 0 { goto div_tail_start } + +div_loop: // Loop unrolled 8x do { + MOVUPS (SI)(AX*8), X0 // X0 = x[i:i+1] + MOVUPS 16(SI)(AX*8), X1 + MOVUPS 32(SI)(AX*8), X2 + MOVUPS 48(SI)(AX*8), X3 + DIVPD (DX)(AX*8), X0 // X0 /= y[i:i+1] + DIVPD 16(DX)(AX*8), X1 + DIVPD 32(DX)(AX*8), X2 + DIVPD 48(DX)(AX*8), X3 + MOVUPS X0, (DI)(AX*8) // dst[i:i+1] = X0 + MOVUPS X1, 16(DI)(AX*8) + MOVUPS X2, 32(DI)(AX*8) + MOVUPS X3, 48(DI)(AX*8) + ADDQ $8, AX // i += 8 + LOOP div_loop // } while --CX > 0 + CMPQ BX, $0 // if BX == 0 { return } + JE div_end + +div_tail_start: // Reset loop registers + MOVQ BX, CX // Loop counter: CX = BX + +div_tail: // do { + MOVSD (SI)(AX*8), X0 // X0 = x[i] + DIVSD (DX)(AX*8), X0 // X0 /= y[i] + MOVSD X0, (DI)(AX*8) + INCQ AX // ++i + LOOP div_tail // } while --CX > 0 + +div_end: + MOVQ DI, ret_base+72(FP) // &ret = &dst + MOVQ dst_cap+16(FP), DI // cap(ret) = cap(dst) + MOVQ DI, ret_cap+88(FP) + RET diff --git a/vendor/gonum.org/v1/gonum/internal/asm/f64/doc.go b/vendor/gonum.org/v1/gonum/internal/asm/f64/doc.go new file mode 100644 index 0000000000..33c76c1e03 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/internal/asm/f64/doc.go @@ -0,0 +1,6 @@ +// Copyright ©2017 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package f64 provides float64 vector primitives. +package f64 // import "gonum.org/v1/gonum/internal/asm/f64" diff --git a/vendor/gonum.org/v1/gonum/internal/asm/f64/dot.go b/vendor/gonum.org/v1/gonum/internal/asm/f64/dot.go new file mode 100644 index 0000000000..099316440e --- /dev/null +++ b/vendor/gonum.org/v1/gonum/internal/asm/f64/dot.go @@ -0,0 +1,38 @@ +// Copyright ©2015 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build !amd64 || noasm || gccgo || safe +// +build !amd64 noasm gccgo safe + +package f64 + +// DotUnitary is +// +// for i, v := range x { +// sum += y[i] * v +// } +// return sum +func DotUnitary(x, y []float64) (sum float64) { + for i, v := range x { + sum += y[i] * v + } + return sum +} + +// DotInc is +// +// for i := 0; i < int(n); i++ { +// sum += y[iy] * x[ix] +// ix += incX +// iy += incY +// } +// return sum +func DotInc(x, y []float64, n, incX, incY, ix, iy uintptr) (sum float64) { + for i := 0; i < int(n); i++ { + sum += y[iy] * x[ix] + ix += incX + iy += incY + } + return sum +} diff --git a/vendor/gonum.org/v1/gonum/internal/asm/f64/dot_amd64.s b/vendor/gonum.org/v1/gonum/internal/asm/f64/dot_amd64.s new file mode 100644 index 0000000000..c8cd412962 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/internal/asm/f64/dot_amd64.s @@ -0,0 +1,145 @@ +// Copyright ©2015 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. +// +// Some of the loop unrolling code is copied from: +// http://golang.org/src/math/big/arith_amd64.s +// which is distributed under these terms: +// +// Copyright (c) 2012 The Go Authors. All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// +build !noasm,!gccgo,!safe + +#include "textflag.h" + +// func DdotUnitary(x, y []float64) (sum float64) +// This function assumes len(y) >= len(x). +TEXT ·DotUnitary(SB), NOSPLIT, $0 + MOVQ x+0(FP), R8 + MOVQ x_len+8(FP), DI // n = len(x) + MOVQ y+24(FP), R9 + + MOVSD $(0.0), X7 // sum = 0 + MOVSD $(0.0), X8 // sum = 0 + + MOVQ $0, SI // i = 0 + SUBQ $4, DI // n -= 4 + JL tail_uni // if n < 0 goto tail_uni + +loop_uni: + // sum += x[i] * y[i] unrolled 4x. + MOVUPD 0(R8)(SI*8), X0 + MOVUPD 0(R9)(SI*8), X1 + MOVUPD 16(R8)(SI*8), X2 + MOVUPD 16(R9)(SI*8), X3 + MULPD X1, X0 + MULPD X3, X2 + ADDPD X0, X7 + ADDPD X2, X8 + + ADDQ $4, SI // i += 4 + SUBQ $4, DI // n -= 4 + JGE loop_uni // if n >= 0 goto loop_uni + +tail_uni: + ADDQ $4, DI // n += 4 + JLE end_uni // if n <= 0 goto end_uni + +onemore_uni: + // sum += x[i] * y[i] for the remaining 1-3 elements. + MOVSD 0(R8)(SI*8), X0 + MOVSD 0(R9)(SI*8), X1 + MULSD X1, X0 + ADDSD X0, X7 + + ADDQ $1, SI // i++ + SUBQ $1, DI // n-- + JNZ onemore_uni // if n != 0 goto onemore_uni + +end_uni: + // Add the four sums together. + ADDPD X8, X7 + MOVSD X7, X0 + UNPCKHPD X7, X7 + ADDSD X0, X7 + MOVSD X7, sum+48(FP) // Return final sum. + RET + +// func DdotInc(x, y []float64, n, incX, incY, ix, iy uintptr) (sum float64) +TEXT ·DotInc(SB), NOSPLIT, $0 + MOVQ x+0(FP), R8 + MOVQ y+24(FP), R9 + MOVQ n+48(FP), CX + MOVQ incX+56(FP), R11 + MOVQ incY+64(FP), R12 + MOVQ ix+72(FP), R13 + MOVQ iy+80(FP), R14 + + MOVSD $(0.0), X7 // sum = 0 + LEAQ (R8)(R13*8), SI // p = &x[ix] + LEAQ (R9)(R14*8), DI // q = &y[ix] + SHLQ $3, R11 // incX *= sizeof(float64) + SHLQ $3, R12 // indY *= sizeof(float64) + + SUBQ $2, CX // n -= 2 + JL tail_inc // if n < 0 goto tail_inc + +loop_inc: + // sum += *p * *q unrolled 2x. + MOVHPD (SI), X0 + MOVHPD (DI), X1 + ADDQ R11, SI // p += incX + ADDQ R12, DI // q += incY + MOVLPD (SI), X0 + MOVLPD (DI), X1 + ADDQ R11, SI // p += incX + ADDQ R12, DI // q += incY + + MULPD X1, X0 + ADDPD X0, X7 + + SUBQ $2, CX // n -= 2 + JGE loop_inc // if n >= 0 goto loop_inc + +tail_inc: + ADDQ $2, CX // n += 2 + JLE end_inc // if n <= 0 goto end_inc + + // sum += *p * *q for the last iteration if n is odd. + MOVSD (SI), X0 + MULSD (DI), X0 + ADDSD X0, X7 + +end_inc: + // Add the two sums together. + MOVSD X7, X0 + UNPCKHPD X7, X7 + ADDSD X0, X7 + MOVSD X7, sum+88(FP) // Return final sum. + RET diff --git a/vendor/gonum.org/v1/gonum/internal/asm/f64/ge_amd64.go b/vendor/gonum.org/v1/gonum/internal/asm/f64/ge_amd64.go new file mode 100644 index 0000000000..5b04233845 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/internal/asm/f64/ge_amd64.go @@ -0,0 +1,29 @@ +// Copyright ©2017 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build !noasm && !gccgo && !safe +// +build !noasm,!gccgo,!safe + +package f64 + +// Ger performs the rank-one operation +// +// A += alpha * x * yᵀ +// +// where A is an m×n dense matrix, x and y are vectors, and alpha is a scalar. +func Ger(m, n uintptr, alpha float64, x []float64, incX uintptr, y []float64, incY uintptr, a []float64, lda uintptr) + +// GemvN computes +// +// y = alpha * A * x + beta * y +// +// where A is an m×n dense matrix, x and y are vectors, and alpha and beta are scalars. +func GemvN(m, n uintptr, alpha float64, a []float64, lda uintptr, x []float64, incX uintptr, beta float64, y []float64, incY uintptr) + +// GemvT computes +// +// y = alpha * Aᵀ * x + beta * y +// +// where A is an m×n dense matrix, x and y are vectors, and alpha and beta are scalars. +func GemvT(m, n uintptr, alpha float64, a []float64, lda uintptr, x []float64, incX uintptr, beta float64, y []float64, incY uintptr) diff --git a/vendor/gonum.org/v1/gonum/internal/asm/f64/ge_noasm.go b/vendor/gonum.org/v1/gonum/internal/asm/f64/ge_noasm.go new file mode 100644 index 0000000000..e8dee0511b --- /dev/null +++ b/vendor/gonum.org/v1/gonum/internal/asm/f64/ge_noasm.go @@ -0,0 +1,125 @@ +// Copyright ©2017 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build !amd64 || noasm || gccgo || safe +// +build !amd64 noasm gccgo safe + +package f64 + +// Ger performs the rank-one operation +// +// A += alpha * x * yᵀ +// +// where A is an m×n dense matrix, x and y are vectors, and alpha is a scalar. +func Ger(m, n uintptr, alpha float64, x []float64, incX uintptr, y []float64, incY uintptr, a []float64, lda uintptr) { + if incX == 1 && incY == 1 { + x = x[:m] + y = y[:n] + for i, xv := range x { + AxpyUnitary(alpha*xv, y, a[uintptr(i)*lda:uintptr(i)*lda+n]) + } + return + } + + var ky, kx uintptr + if int(incY) < 0 { + ky = uintptr(-int(n-1) * int(incY)) + } + if int(incX) < 0 { + kx = uintptr(-int(m-1) * int(incX)) + } + + ix := kx + for i := 0; i < int(m); i++ { + AxpyInc(alpha*x[ix], y, a[uintptr(i)*lda:uintptr(i)*lda+n], n, incY, 1, ky, 0) + ix += incX + } +} + +// GemvN computes +// +// y = alpha * A * x + beta * y +// +// where A is an m×n dense matrix, x and y are vectors, and alpha and beta are scalars. +func GemvN(m, n uintptr, alpha float64, a []float64, lda uintptr, x []float64, incX uintptr, beta float64, y []float64, incY uintptr) { + var kx, ky, i uintptr + if int(incX) < 0 { + kx = uintptr(-int(n-1) * int(incX)) + } + if int(incY) < 0 { + ky = uintptr(-int(m-1) * int(incY)) + } + + if incX == 1 && incY == 1 { + if beta == 0 { + for i = 0; i < m; i++ { + y[i] = alpha * DotUnitary(a[lda*i:lda*i+n], x) + } + return + } + for i = 0; i < m; i++ { + y[i] = y[i]*beta + alpha*DotUnitary(a[lda*i:lda*i+n], x) + } + return + } + iy := ky + if beta == 0 { + for i = 0; i < m; i++ { + y[iy] = alpha * DotInc(x, a[lda*i:lda*i+n], n, incX, 1, kx, 0) + iy += incY + } + return + } + for i = 0; i < m; i++ { + y[iy] = y[iy]*beta + alpha*DotInc(x, a[lda*i:lda*i+n], n, incX, 1, kx, 0) + iy += incY + } +} + +// GemvT computes +// +// y = alpha * Aᵀ * x + beta * y +// +// where A is an m×n dense matrix, x and y are vectors, and alpha and beta are scalars. +func GemvT(m, n uintptr, alpha float64, a []float64, lda uintptr, x []float64, incX uintptr, beta float64, y []float64, incY uintptr) { + var kx, ky, i uintptr + if int(incX) < 0 { + kx = uintptr(-int(m-1) * int(incX)) + } + if int(incY) < 0 { + ky = uintptr(-int(n-1) * int(incY)) + } + switch { + case beta == 0: // beta == 0 is special-cased to memclear + if incY == 1 { + for i := range y { + y[i] = 0 + } + } else { + iy := ky + for i := 0; i < int(n); i++ { + y[iy] = 0 + iy += incY + } + } + case int(incY) < 0: + ScalInc(beta, y, n, uintptr(int(-incY))) + case incY == 1: + ScalUnitary(beta, y[:n]) + default: + ScalInc(beta, y, n, incY) + } + + if incX == 1 && incY == 1 { + for i = 0; i < m; i++ { + AxpyUnitaryTo(y, alpha*x[i], a[lda*i:lda*i+n], y) + } + return + } + ix := kx + for i = 0; i < m; i++ { + AxpyInc(alpha*x[ix], a[lda*i:lda*i+n], y, n, 1, incY, 0, ky) + ix += incX + } +} diff --git a/vendor/gonum.org/v1/gonum/internal/asm/f64/gemvN_amd64.s b/vendor/gonum.org/v1/gonum/internal/asm/f64/gemvN_amd64.s new file mode 100644 index 0000000000..917e0e30e1 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/internal/asm/f64/gemvN_amd64.s @@ -0,0 +1,685 @@ +// Copyright ©2017 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build !noasm,!gccgo,!safe + +#include "textflag.h" + +#define SIZE 8 + +#define M_DIM m+0(FP) +#define M CX +#define N_DIM n+8(FP) +#define N BX + +#define TMP1 R14 +#define TMP2 R15 + +#define X_PTR SI +#define X x_base+56(FP) +#define INC_X R8 +#define INC3_X R9 + +#define Y_PTR DX +#define Y y_base+96(FP) +#define INC_Y R10 +#define INC3_Y R11 + +#define A_ROW AX +#define A_PTR DI +#define LDA R12 +#define LDA3 R13 + +#define ALPHA X15 +#define BETA X14 + +#define INIT4 \ + XORPS X0, X0 \ + XORPS X1, X1 \ + XORPS X2, X2 \ + XORPS X3, X3 + +#define INIT2 \ + XORPS X0, X0 \ + XORPS X1, X1 + +#define INIT1 \ + XORPS X0, X0 + +#define KERNEL_LOAD4 \ + MOVUPS (X_PTR), X12 \ + MOVUPS 2*SIZE(X_PTR), X13 + +#define KERNEL_LOAD2 \ + MOVUPS (X_PTR), X12 + +#define KERNEL_LOAD4_INC \ + MOVSD (X_PTR), X12 \ + MOVHPD (X_PTR)(INC_X*1), X12 \ + MOVSD (X_PTR)(INC_X*2), X13 \ + MOVHPD (X_PTR)(INC3_X*1), X13 + +#define KERNEL_LOAD2_INC \ + MOVSD (X_PTR), X12 \ + MOVHPD (X_PTR)(INC_X*1), X12 + +#define KERNEL_4x4 \ + MOVUPS (A_PTR), X4 \ + MOVUPS 2*SIZE(A_PTR), X5 \ + MOVUPS (A_PTR)(LDA*1), X6 \ + MOVUPS 2*SIZE(A_PTR)(LDA*1), X7 \ + MOVUPS (A_PTR)(LDA*2), X8 \ + MOVUPS 2*SIZE(A_PTR)(LDA*2), X9 \ + MOVUPS (A_PTR)(LDA3*1), X10 \ + MOVUPS 2*SIZE(A_PTR)(LDA3*1), X11 \ + MULPD X12, X4 \ + MULPD X13, X5 \ + MULPD X12, X6 \ + MULPD X13, X7 \ + MULPD X12, X8 \ + MULPD X13, X9 \ + MULPD X12, X10 \ + MULPD X13, X11 \ + ADDPD X4, X0 \ + ADDPD X5, X0 \ + ADDPD X6, X1 \ + ADDPD X7, X1 \ + ADDPD X8, X2 \ + ADDPD X9, X2 \ + ADDPD X10, X3 \ + ADDPD X11, X3 \ + ADDQ $4*SIZE, A_PTR + +#define KERNEL_4x2 \ + MOVUPS (A_PTR), X4 \ + MOVUPS (A_PTR)(LDA*1), X5 \ + MOVUPS (A_PTR)(LDA*2), X6 \ + MOVUPS (A_PTR)(LDA3*1), X7 \ + MULPD X12, X4 \ + MULPD X12, X5 \ + MULPD X12, X6 \ + MULPD X12, X7 \ + ADDPD X4, X0 \ + ADDPD X5, X1 \ + ADDPD X6, X2 \ + ADDPD X7, X3 \ + ADDQ $2*SIZE, A_PTR + +#define KERNEL_4x1 \ + MOVDDUP (X_PTR), X12 \ + MOVSD (A_PTR), X4 \ + MOVHPD (A_PTR)(LDA*1), X4 \ + MOVSD (A_PTR)(LDA*2), X5 \ + MOVHPD (A_PTR)(LDA3*1), X5 \ + MULPD X12, X4 \ + MULPD X12, X5 \ + ADDPD X4, X0 \ + ADDPD X5, X2 \ + ADDQ $SIZE, A_PTR + +#define STORE4 \ + MOVUPS (Y_PTR), X4 \ + MOVUPS 2*SIZE(Y_PTR), X5 \ + MULPD ALPHA, X0 \ + MULPD ALPHA, X2 \ + MULPD BETA, X4 \ + MULPD BETA, X5 \ + ADDPD X0, X4 \ + ADDPD X2, X5 \ + MOVUPS X4, (Y_PTR) \ + MOVUPS X5, 2*SIZE(Y_PTR) + +#define STORE4_INC \ + MOVSD (Y_PTR), X4 \ + MOVHPD (Y_PTR)(INC_Y*1), X4 \ + MOVSD (Y_PTR)(INC_Y*2), X5 \ + MOVHPD (Y_PTR)(INC3_Y*1), X5 \ + MULPD ALPHA, X0 \ + MULPD ALPHA, X2 \ + MULPD BETA, X4 \ + MULPD BETA, X5 \ + ADDPD X0, X4 \ + ADDPD X2, X5 \ + MOVLPD X4, (Y_PTR) \ + MOVHPD X4, (Y_PTR)(INC_Y*1) \ + MOVLPD X5, (Y_PTR)(INC_Y*2) \ + MOVHPD X5, (Y_PTR)(INC3_Y*1) + +#define KERNEL_2x4 \ + MOVUPS (A_PTR), X8 \ + MOVUPS 2*SIZE(A_PTR), X9 \ + MOVUPS (A_PTR)(LDA*1), X10 \ + MOVUPS 2*SIZE(A_PTR)(LDA*1), X11 \ + MULPD X12, X8 \ + MULPD X13, X9 \ + MULPD X12, X10 \ + MULPD X13, X11 \ + ADDPD X8, X0 \ + ADDPD X10, X1 \ + ADDPD X9, X0 \ + ADDPD X11, X1 \ + ADDQ $4*SIZE, A_PTR + +#define KERNEL_2x2 \ + MOVUPS (A_PTR), X8 \ + MOVUPS (A_PTR)(LDA*1), X9 \ + MULPD X12, X8 \ + MULPD X12, X9 \ + ADDPD X8, X0 \ + ADDPD X9, X1 \ + ADDQ $2*SIZE, A_PTR + +#define KERNEL_2x1 \ + MOVDDUP (X_PTR), X12 \ + MOVSD (A_PTR), X8 \ + MOVHPD (A_PTR)(LDA*1), X8 \ + MULPD X12, X8 \ + ADDPD X8, X0 \ + ADDQ $SIZE, A_PTR + +#define STORE2 \ + MOVUPS (Y_PTR), X4 \ + MULPD ALPHA, X0 \ + MULPD BETA, X4 \ + ADDPD X0, X4 \ + MOVUPS X4, (Y_PTR) + +#define STORE2_INC \ + MOVSD (Y_PTR), X4 \ + MOVHPD (Y_PTR)(INC_Y*1), X4 \ + MULPD ALPHA, X0 \ + MULPD BETA, X4 \ + ADDPD X0, X4 \ + MOVSD X4, (Y_PTR) \ + MOVHPD X4, (Y_PTR)(INC_Y*1) + +#define KERNEL_1x4 \ + MOVUPS (A_PTR), X8 \ + MOVUPS 2*SIZE(A_PTR), X9 \ + MULPD X12, X8 \ + MULPD X13, X9 \ + ADDPD X8, X0 \ + ADDPD X9, X0 \ + ADDQ $4*SIZE, A_PTR + +#define KERNEL_1x2 \ + MOVUPS (A_PTR), X8 \ + MULPD X12, X8 \ + ADDPD X8, X0 \ + ADDQ $2*SIZE, A_PTR + +#define KERNEL_1x1 \ + MOVSD (X_PTR), X12 \ + MOVSD (A_PTR), X8 \ + MULSD X12, X8 \ + ADDSD X8, X0 \ + ADDQ $SIZE, A_PTR + +#define STORE1 \ + HADDPD X0, X0 \ + MOVSD (Y_PTR), X4 \ + MULSD ALPHA, X0 \ + MULSD BETA, X4 \ + ADDSD X0, X4 \ + MOVSD X4, (Y_PTR) + +// func GemvN(m, n int, +// alpha float64, +// a []float64, lda int, +// x []float64, incX int, +// beta float64, +// y []float64, incY int) +TEXT ·GemvN(SB), NOSPLIT, $32-128 + MOVQ M_DIM, M + MOVQ N_DIM, N + CMPQ M, $0 + JE end + CMPQ N, $0 + JE end + + MOVDDUP alpha+16(FP), ALPHA + MOVDDUP beta+88(FP), BETA + + MOVQ x_base+56(FP), X_PTR + MOVQ y_base+96(FP), Y_PTR + MOVQ a_base+24(FP), A_ROW + MOVQ incY+120(FP), INC_Y + MOVQ lda+48(FP), LDA // LDA = LDA * sizeof(float64) + SHLQ $3, LDA + LEAQ (LDA)(LDA*2), LDA3 // LDA3 = LDA * 3 + MOVQ A_ROW, A_PTR + + XORQ TMP2, TMP2 + MOVQ M, TMP1 + SUBQ $1, TMP1 + IMULQ INC_Y, TMP1 + NEGQ TMP1 + CMPQ INC_Y, $0 + CMOVQLT TMP1, TMP2 + LEAQ (Y_PTR)(TMP2*SIZE), Y_PTR + MOVQ Y_PTR, Y + + SHLQ $3, INC_Y // INC_Y = incY * sizeof(float64) + LEAQ (INC_Y)(INC_Y*2), INC3_Y // INC3_Y = INC_Y * 3 + + MOVSD $0.0, X0 + COMISD BETA, X0 + JNE gemv_start // if beta != 0 { goto gemv_start } + +gemv_clear: // beta == 0 is special cased to clear memory (no nan handling) + XORPS X0, X0 + XORPS X1, X1 + XORPS X2, X2 + XORPS X3, X3 + + CMPQ incY+120(FP), $1 // Check for dense vector X (fast-path) + JNE inc_clear + + SHRQ $3, M + JZ clear4 + +clear8: + MOVUPS X0, (Y_PTR) + MOVUPS X1, 16(Y_PTR) + MOVUPS X2, 32(Y_PTR) + MOVUPS X3, 48(Y_PTR) + ADDQ $8*SIZE, Y_PTR + DECQ M + JNZ clear8 + +clear4: + TESTQ $4, M_DIM + JZ clear2 + MOVUPS X0, (Y_PTR) + MOVUPS X1, 16(Y_PTR) + ADDQ $4*SIZE, Y_PTR + +clear2: + TESTQ $2, M_DIM + JZ clear1 + MOVUPS X0, (Y_PTR) + ADDQ $2*SIZE, Y_PTR + +clear1: + TESTQ $1, M_DIM + JZ prep_end + MOVSD X0, (Y_PTR) + + JMP prep_end + +inc_clear: + SHRQ $2, M + JZ inc_clear2 + +inc_clear4: + MOVSD X0, (Y_PTR) + MOVSD X1, (Y_PTR)(INC_Y*1) + MOVSD X2, (Y_PTR)(INC_Y*2) + MOVSD X3, (Y_PTR)(INC3_Y*1) + LEAQ (Y_PTR)(INC_Y*4), Y_PTR + DECQ M + JNZ inc_clear4 + +inc_clear2: + TESTQ $2, M_DIM + JZ inc_clear1 + MOVSD X0, (Y_PTR) + MOVSD X1, (Y_PTR)(INC_Y*1) + LEAQ (Y_PTR)(INC_Y*2), Y_PTR + +inc_clear1: + TESTQ $1, M_DIM + JZ prep_end + MOVSD X0, (Y_PTR) + +prep_end: + MOVQ Y, Y_PTR + MOVQ M_DIM, M + +gemv_start: + CMPQ incX+80(FP), $1 // Check for dense vector X (fast-path) + JNE inc + + SHRQ $2, M + JZ r2 + +r4: + // LOAD 4 + INIT4 + + MOVQ N_DIM, N + SHRQ $2, N + JZ r4c2 + +r4c4: + // 4x4 KERNEL + KERNEL_LOAD4 + KERNEL_4x4 + + ADDQ $4*SIZE, X_PTR + + DECQ N + JNZ r4c4 + +r4c2: + TESTQ $2, N_DIM + JZ r4c1 + + // 4x2 KERNEL + KERNEL_LOAD2 + KERNEL_4x2 + + ADDQ $2*SIZE, X_PTR + +r4c1: + HADDPD X1, X0 + HADDPD X3, X2 + TESTQ $1, N_DIM + JZ r4end + + // 4x1 KERNEL + KERNEL_4x1 + + ADDQ $SIZE, X_PTR + +r4end: + CMPQ INC_Y, $SIZE + JNZ r4st_inc + + STORE4 + ADDQ $4*SIZE, Y_PTR + JMP r4inc + +r4st_inc: + STORE4_INC + LEAQ (Y_PTR)(INC_Y*4), Y_PTR + +r4inc: + MOVQ X, X_PTR + LEAQ (A_ROW)(LDA*4), A_ROW + MOVQ A_ROW, A_PTR + + DECQ M + JNZ r4 + +r2: + TESTQ $2, M_DIM + JZ r1 + + // LOAD 2 + INIT2 + + MOVQ N_DIM, N + SHRQ $2, N + JZ r2c2 + +r2c4: + // 2x4 KERNEL + KERNEL_LOAD4 + KERNEL_2x4 + + ADDQ $4*SIZE, X_PTR + + DECQ N + JNZ r2c4 + +r2c2: + TESTQ $2, N_DIM + JZ r2c1 + + // 2x2 KERNEL + KERNEL_LOAD2 + KERNEL_2x2 + + ADDQ $2*SIZE, X_PTR + +r2c1: + HADDPD X1, X0 + TESTQ $1, N_DIM + JZ r2end + + // 2x1 KERNEL + KERNEL_2x1 + + ADDQ $SIZE, X_PTR + +r2end: + CMPQ INC_Y, $SIZE + JNE r2st_inc + + STORE2 + ADDQ $2*SIZE, Y_PTR + JMP r2inc + +r2st_inc: + STORE2_INC + LEAQ (Y_PTR)(INC_Y*2), Y_PTR + +r2inc: + MOVQ X, X_PTR + LEAQ (A_ROW)(LDA*2), A_ROW + MOVQ A_ROW, A_PTR + +r1: + TESTQ $1, M_DIM + JZ end + + // LOAD 1 + INIT1 + + MOVQ N_DIM, N + SHRQ $2, N + JZ r1c2 + +r1c4: + // 1x4 KERNEL + KERNEL_LOAD4 + KERNEL_1x4 + + ADDQ $4*SIZE, X_PTR + + DECQ N + JNZ r1c4 + +r1c2: + TESTQ $2, N_DIM + JZ r1c1 + + // 1x2 KERNEL + KERNEL_LOAD2 + KERNEL_1x2 + + ADDQ $2*SIZE, X_PTR + +r1c1: + + TESTQ $1, N_DIM + JZ r1end + + // 1x1 KERNEL + KERNEL_1x1 + +r1end: + STORE1 + +end: + RET + +inc: // Algorithm for incX != 1 ( split loads in kernel ) + MOVQ incX+80(FP), INC_X // INC_X = incX + + XORQ TMP2, TMP2 // TMP2 = 0 + MOVQ N, TMP1 // TMP1 = N + SUBQ $1, TMP1 // TMP1 -= 1 + NEGQ TMP1 // TMP1 = -TMP1 + IMULQ INC_X, TMP1 // TMP1 *= INC_X + CMPQ INC_X, $0 // if INC_X < 0 { TMP2 = TMP1 } + CMOVQLT TMP1, TMP2 + LEAQ (X_PTR)(TMP2*SIZE), X_PTR // X_PTR = X_PTR[TMP2] + MOVQ X_PTR, X // X = X_PTR + + SHLQ $3, INC_X + LEAQ (INC_X)(INC_X*2), INC3_X // INC3_X = INC_X * 3 + + SHRQ $2, M + JZ inc_r2 + +inc_r4: + // LOAD 4 + INIT4 + + MOVQ N_DIM, N + SHRQ $2, N + JZ inc_r4c2 + +inc_r4c4: + // 4x4 KERNEL + KERNEL_LOAD4_INC + KERNEL_4x4 + + LEAQ (X_PTR)(INC_X*4), X_PTR + + DECQ N + JNZ inc_r4c4 + +inc_r4c2: + TESTQ $2, N_DIM + JZ inc_r4c1 + + // 4x2 KERNEL + KERNEL_LOAD2_INC + KERNEL_4x2 + + LEAQ (X_PTR)(INC_X*2), X_PTR + +inc_r4c1: + HADDPD X1, X0 + HADDPD X3, X2 + TESTQ $1, N_DIM + JZ inc_r4end + + // 4x1 KERNEL + KERNEL_4x1 + + ADDQ INC_X, X_PTR + +inc_r4end: + CMPQ INC_Y, $SIZE + JNE inc_r4st_inc + + STORE4 + ADDQ $4*SIZE, Y_PTR + JMP inc_r4inc + +inc_r4st_inc: + STORE4_INC + LEAQ (Y_PTR)(INC_Y*4), Y_PTR + +inc_r4inc: + MOVQ X, X_PTR + LEAQ (A_ROW)(LDA*4), A_ROW + MOVQ A_ROW, A_PTR + + DECQ M + JNZ inc_r4 + +inc_r2: + TESTQ $2, M_DIM + JZ inc_r1 + + // LOAD 2 + INIT2 + + MOVQ N_DIM, N + SHRQ $2, N + JZ inc_r2c2 + +inc_r2c4: + // 2x4 KERNEL + KERNEL_LOAD4_INC + KERNEL_2x4 + + LEAQ (X_PTR)(INC_X*4), X_PTR + DECQ N + JNZ inc_r2c4 + +inc_r2c2: + TESTQ $2, N_DIM + JZ inc_r2c1 + + // 2x2 KERNEL + KERNEL_LOAD2_INC + KERNEL_2x2 + + LEAQ (X_PTR)(INC_X*2), X_PTR + +inc_r2c1: + HADDPD X1, X0 + TESTQ $1, N_DIM + JZ inc_r2end + + // 2x1 KERNEL + KERNEL_2x1 + + ADDQ INC_X, X_PTR + +inc_r2end: + CMPQ INC_Y, $SIZE + JNE inc_r2st_inc + + STORE2 + ADDQ $2*SIZE, Y_PTR + JMP inc_r2inc + +inc_r2st_inc: + STORE2_INC + LEAQ (Y_PTR)(INC_Y*2), Y_PTR + +inc_r2inc: + MOVQ X, X_PTR + LEAQ (A_ROW)(LDA*2), A_ROW + MOVQ A_ROW, A_PTR + +inc_r1: + TESTQ $1, M_DIM + JZ inc_end + + // LOAD 1 + INIT1 + + MOVQ N_DIM, N + SHRQ $2, N + JZ inc_r1c2 + +inc_r1c4: + // 1x4 KERNEL + KERNEL_LOAD4_INC + KERNEL_1x4 + + LEAQ (X_PTR)(INC_X*4), X_PTR + DECQ N + JNZ inc_r1c4 + +inc_r1c2: + TESTQ $2, N_DIM + JZ inc_r1c1 + + // 1x2 KERNEL + KERNEL_LOAD2_INC + KERNEL_1x2 + + LEAQ (X_PTR)(INC_X*2), X_PTR + +inc_r1c1: + TESTQ $1, N_DIM + JZ inc_r1end + + // 1x1 KERNEL + KERNEL_1x1 + +inc_r1end: + STORE1 + +inc_end: + RET diff --git a/vendor/gonum.org/v1/gonum/internal/asm/f64/gemvT_amd64.s b/vendor/gonum.org/v1/gonum/internal/asm/f64/gemvT_amd64.s new file mode 100644 index 0000000000..040710009e --- /dev/null +++ b/vendor/gonum.org/v1/gonum/internal/asm/f64/gemvT_amd64.s @@ -0,0 +1,745 @@ +// Copyright ©2017 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build !noasm,!gccgo,!safe + +#include "textflag.h" + +#define SIZE 8 + +#define M_DIM n+8(FP) +#define M CX +#define N_DIM m+0(FP) +#define N BX + +#define TMP1 R14 +#define TMP2 R15 + +#define X_PTR SI +#define X x_base+56(FP) +#define Y_PTR DX +#define Y y_base+96(FP) +#define A_ROW AX +#define A_PTR DI + +#define INC_X R8 +#define INC3_X R9 + +#define INC_Y R10 +#define INC3_Y R11 + +#define LDA R12 +#define LDA3 R13 + +#define ALPHA X15 +#define BETA X14 + +#define INIT4 \ + MOVDDUP (X_PTR), X8 \ + MOVDDUP (X_PTR)(INC_X*1), X9 \ + MOVDDUP (X_PTR)(INC_X*2), X10 \ + MOVDDUP (X_PTR)(INC3_X*1), X11 \ + MULPD ALPHA, X8 \ + MULPD ALPHA, X9 \ + MULPD ALPHA, X10 \ + MULPD ALPHA, X11 + +#define INIT2 \ + MOVDDUP (X_PTR), X8 \ + MOVDDUP (X_PTR)(INC_X*1), X9 \ + MULPD ALPHA, X8 \ + MULPD ALPHA, X9 + +#define INIT1 \ + MOVDDUP (X_PTR), X8 \ + MULPD ALPHA, X8 + +#define KERNEL_LOAD4 \ + MOVUPS (Y_PTR), X0 \ + MOVUPS 2*SIZE(Y_PTR), X1 + +#define KERNEL_LOAD2 \ + MOVUPS (Y_PTR), X0 + +#define KERNEL_LOAD4_INC \ + MOVSD (Y_PTR), X0 \ + MOVHPD (Y_PTR)(INC_Y*1), X0 \ + MOVSD (Y_PTR)(INC_Y*2), X1 \ + MOVHPD (Y_PTR)(INC3_Y*1), X1 + +#define KERNEL_LOAD2_INC \ + MOVSD (Y_PTR), X0 \ + MOVHPD (Y_PTR)(INC_Y*1), X0 + +#define KERNEL_4x4 \ + MOVUPS (A_PTR), X4 \ + MOVUPS 2*SIZE(A_PTR), X5 \ + MOVUPS (A_PTR)(LDA*1), X6 \ + MOVUPS 2*SIZE(A_PTR)(LDA*1), X7 \ + MULPD X8, X4 \ + MULPD X8, X5 \ + MULPD X9, X6 \ + MULPD X9, X7 \ + ADDPD X4, X0 \ + ADDPD X5, X1 \ + ADDPD X6, X0 \ + ADDPD X7, X1 \ + MOVUPS (A_PTR)(LDA*2), X4 \ + MOVUPS 2*SIZE(A_PTR)(LDA*2), X5 \ + MOVUPS (A_PTR)(LDA3*1), X6 \ + MOVUPS 2*SIZE(A_PTR)(LDA3*1), X7 \ + MULPD X10, X4 \ + MULPD X10, X5 \ + MULPD X11, X6 \ + MULPD X11, X7 \ + ADDPD X4, X0 \ + ADDPD X5, X1 \ + ADDPD X6, X0 \ + ADDPD X7, X1 \ + ADDQ $4*SIZE, A_PTR + +#define KERNEL_4x2 \ + MOVUPS (A_PTR), X4 \ + MOVUPS 2*SIZE(A_PTR), X5 \ + MOVUPS (A_PTR)(LDA*1), X6 \ + MOVUPS 2*SIZE(A_PTR)(LDA*1), X7 \ + MULPD X8, X4 \ + MULPD X8, X5 \ + MULPD X9, X6 \ + MULPD X9, X7 \ + ADDPD X4, X0 \ + ADDPD X5, X1 \ + ADDPD X6, X0 \ + ADDPD X7, X1 \ + ADDQ $4*SIZE, A_PTR + +#define KERNEL_4x1 \ + MOVUPS (A_PTR), X4 \ + MOVUPS 2*SIZE(A_PTR), X5 \ + MULPD X8, X4 \ + MULPD X8, X5 \ + ADDPD X4, X0 \ + ADDPD X5, X1 \ + ADDQ $4*SIZE, A_PTR + +#define STORE4 \ + MOVUPS X0, (Y_PTR) \ + MOVUPS X1, 2*SIZE(Y_PTR) + +#define STORE4_INC \ + MOVLPD X0, (Y_PTR) \ + MOVHPD X0, (Y_PTR)(INC_Y*1) \ + MOVLPD X1, (Y_PTR)(INC_Y*2) \ + MOVHPD X1, (Y_PTR)(INC3_Y*1) + +#define KERNEL_2x4 \ + MOVUPS (A_PTR), X4 \ + MOVUPS (A_PTR)(LDA*1), X5 \ + MOVUPS (A_PTR)(LDA*2), X6 \ + MOVUPS (A_PTR)(LDA3*1), X7 \ + MULPD X8, X4 \ + MULPD X9, X5 \ + MULPD X10, X6 \ + MULPD X11, X7 \ + ADDPD X4, X0 \ + ADDPD X5, X0 \ + ADDPD X6, X0 \ + ADDPD X7, X0 \ + ADDQ $2*SIZE, A_PTR + +#define KERNEL_2x2 \ + MOVUPS (A_PTR), X4 \ + MOVUPS (A_PTR)(LDA*1), X5 \ + MULPD X8, X4 \ + MULPD X9, X5 \ + ADDPD X4, X0 \ + ADDPD X5, X0 \ + ADDQ $2*SIZE, A_PTR + +#define KERNEL_2x1 \ + MOVUPS (A_PTR), X4 \ + MULPD X8, X4 \ + ADDPD X4, X0 \ + ADDQ $2*SIZE, A_PTR + +#define STORE2 \ + MOVUPS X0, (Y_PTR) + +#define STORE2_INC \ + MOVLPD X0, (Y_PTR) \ + MOVHPD X0, (Y_PTR)(INC_Y*1) + +#define KERNEL_1x4 \ + MOVSD (Y_PTR), X0 \ + MOVSD (A_PTR), X4 \ + MOVSD (A_PTR)(LDA*1), X5 \ + MOVSD (A_PTR)(LDA*2), X6 \ + MOVSD (A_PTR)(LDA3*1), X7 \ + MULSD X8, X4 \ + MULSD X9, X5 \ + MULSD X10, X6 \ + MULSD X11, X7 \ + ADDSD X4, X0 \ + ADDSD X5, X0 \ + ADDSD X6, X0 \ + ADDSD X7, X0 \ + MOVSD X0, (Y_PTR) \ + ADDQ $SIZE, A_PTR + +#define KERNEL_1x2 \ + MOVSD (Y_PTR), X0 \ + MOVSD (A_PTR), X4 \ + MOVSD (A_PTR)(LDA*1), X5 \ + MULSD X8, X4 \ + MULSD X9, X5 \ + ADDSD X4, X0 \ + ADDSD X5, X0 \ + MOVSD X0, (Y_PTR) \ + ADDQ $SIZE, A_PTR + +#define KERNEL_1x1 \ + MOVSD (Y_PTR), X0 \ + MOVSD (A_PTR), X4 \ + MULSD X8, X4 \ + ADDSD X4, X0 \ + MOVSD X0, (Y_PTR) \ + ADDQ $SIZE, A_PTR + +#define SCALE_8(PTR, SCAL) \ + MOVUPS (PTR), X0 \ + MOVUPS 16(PTR), X1 \ + MOVUPS 32(PTR), X2 \ + MOVUPS 48(PTR), X3 \ + MULPD SCAL, X0 \ + MULPD SCAL, X1 \ + MULPD SCAL, X2 \ + MULPD SCAL, X3 \ + MOVUPS X0, (PTR) \ + MOVUPS X1, 16(PTR) \ + MOVUPS X2, 32(PTR) \ + MOVUPS X3, 48(PTR) + +#define SCALE_4(PTR, SCAL) \ + MOVUPS (PTR), X0 \ + MOVUPS 16(PTR), X1 \ + MULPD SCAL, X0 \ + MULPD SCAL, X1 \ + MOVUPS X0, (PTR) \ + MOVUPS X1, 16(PTR) \ + +#define SCALE_2(PTR, SCAL) \ + MOVUPS (PTR), X0 \ + MULPD SCAL, X0 \ + MOVUPS X0, (PTR) \ + +#define SCALE_1(PTR, SCAL) \ + MOVSD (PTR), X0 \ + MULSD SCAL, X0 \ + MOVSD X0, (PTR) \ + +#define SCALEINC_4(PTR, INC, INC3, SCAL) \ + MOVSD (PTR), X0 \ + MOVSD (PTR)(INC*1), X1 \ + MOVSD (PTR)(INC*2), X2 \ + MOVSD (PTR)(INC3*1), X3 \ + MULSD SCAL, X0 \ + MULSD SCAL, X1 \ + MULSD SCAL, X2 \ + MULSD SCAL, X3 \ + MOVSD X0, (PTR) \ + MOVSD X1, (PTR)(INC*1) \ + MOVSD X2, (PTR)(INC*2) \ + MOVSD X3, (PTR)(INC3*1) + +#define SCALEINC_2(PTR, INC, SCAL) \ + MOVSD (PTR), X0 \ + MOVSD (PTR)(INC*1), X1 \ + MULSD SCAL, X0 \ + MULSD SCAL, X1 \ + MOVSD X0, (PTR) \ + MOVSD X1, (PTR)(INC*1) + +// func GemvT(m, n int, +// alpha float64, +// a []float64, lda int, +// x []float64, incX int, +// beta float64, +// y []float64, incY int) +TEXT ·GemvT(SB), NOSPLIT, $32-128 + MOVQ M_DIM, M + MOVQ N_DIM, N + CMPQ M, $0 + JE end + CMPQ N, $0 + JE end + + MOVDDUP alpha+16(FP), ALPHA + + MOVQ x_base+56(FP), X_PTR + MOVQ y_base+96(FP), Y_PTR + MOVQ a_base+24(FP), A_ROW + MOVQ incY+120(FP), INC_Y // INC_Y = incY * sizeof(float64) + MOVQ lda+48(FP), LDA // LDA = LDA * sizeof(float64) + SHLQ $3, LDA + LEAQ (LDA)(LDA*2), LDA3 // LDA3 = LDA * 3 + MOVQ A_ROW, A_PTR + + MOVQ incX+80(FP), INC_X // INC_X = incX * sizeof(float64) + + XORQ TMP2, TMP2 + MOVQ N, TMP1 + SUBQ $1, TMP1 + NEGQ TMP1 + IMULQ INC_X, TMP1 + CMPQ INC_X, $0 + CMOVQLT TMP1, TMP2 + LEAQ (X_PTR)(TMP2*SIZE), X_PTR + MOVQ X_PTR, X + + SHLQ $3, INC_X + LEAQ (INC_X)(INC_X*2), INC3_X // INC3_X = INC_X * 3 + + CMPQ incY+120(FP), $1 // Check for dense vector Y (fast-path) + JNE inc + + MOVSD $1.0, X0 + COMISD beta+88(FP), X0 + JE gemv_start + + MOVSD $0.0, X0 + COMISD beta+88(FP), X0 + JE gemv_clear + + MOVDDUP beta+88(FP), BETA + SHRQ $3, M + JZ scal4 + +scal8: + SCALE_8(Y_PTR, BETA) + ADDQ $8*SIZE, Y_PTR + DECQ M + JNZ scal8 + +scal4: + TESTQ $4, M_DIM + JZ scal2 + SCALE_4(Y_PTR, BETA) + ADDQ $4*SIZE, Y_PTR + +scal2: + TESTQ $2, M_DIM + JZ scal1 + SCALE_2(Y_PTR, BETA) + ADDQ $2*SIZE, Y_PTR + +scal1: + TESTQ $1, M_DIM + JZ prep_end + SCALE_1(Y_PTR, BETA) + + JMP prep_end + +gemv_clear: // beta == 0 is special cased to clear memory (no nan handling) + XORPS X0, X0 + XORPS X1, X1 + XORPS X2, X2 + XORPS X3, X3 + + SHRQ $3, M + JZ clear4 + +clear8: + MOVUPS X0, (Y_PTR) + MOVUPS X1, 16(Y_PTR) + MOVUPS X2, 32(Y_PTR) + MOVUPS X3, 48(Y_PTR) + ADDQ $8*SIZE, Y_PTR + DECQ M + JNZ clear8 + +clear4: + TESTQ $4, M_DIM + JZ clear2 + MOVUPS X0, (Y_PTR) + MOVUPS X1, 16(Y_PTR) + ADDQ $4*SIZE, Y_PTR + +clear2: + TESTQ $2, M_DIM + JZ clear1 + MOVUPS X0, (Y_PTR) + ADDQ $2*SIZE, Y_PTR + +clear1: + TESTQ $1, M_DIM + JZ prep_end + MOVSD X0, (Y_PTR) + +prep_end: + MOVQ Y, Y_PTR + MOVQ M_DIM, M + +gemv_start: + SHRQ $2, N + JZ c2 + +c4: + // LOAD 4 + INIT4 + + MOVQ M_DIM, M + SHRQ $2, M + JZ c4r2 + +c4r4: + // 4x4 KERNEL + KERNEL_LOAD4 + KERNEL_4x4 + STORE4 + + ADDQ $4*SIZE, Y_PTR + + DECQ M + JNZ c4r4 + +c4r2: + TESTQ $2, M_DIM + JZ c4r1 + + // 4x2 KERNEL + KERNEL_LOAD2 + KERNEL_2x4 + STORE2 + + ADDQ $2*SIZE, Y_PTR + +c4r1: + TESTQ $1, M_DIM + JZ c4end + + // 4x1 KERNEL + KERNEL_1x4 + + ADDQ $SIZE, Y_PTR + +c4end: + LEAQ (X_PTR)(INC_X*4), X_PTR + MOVQ Y, Y_PTR + LEAQ (A_ROW)(LDA*4), A_ROW + MOVQ A_ROW, A_PTR + + DECQ N + JNZ c4 + +c2: + TESTQ $2, N_DIM + JZ c1 + + // LOAD 2 + INIT2 + + MOVQ M_DIM, M + SHRQ $2, M + JZ c2r2 + +c2r4: + // 2x4 KERNEL + KERNEL_LOAD4 + KERNEL_4x2 + STORE4 + + ADDQ $4*SIZE, Y_PTR + + DECQ M + JNZ c2r4 + +c2r2: + TESTQ $2, M_DIM + JZ c2r1 + + // 2x2 KERNEL + KERNEL_LOAD2 + KERNEL_2x2 + STORE2 + + ADDQ $2*SIZE, Y_PTR + +c2r1: + TESTQ $1, M_DIM + JZ c2end + + // 2x1 KERNEL + KERNEL_1x2 + + ADDQ $SIZE, Y_PTR + +c2end: + LEAQ (X_PTR)(INC_X*2), X_PTR + MOVQ Y, Y_PTR + LEAQ (A_ROW)(LDA*2), A_ROW + MOVQ A_ROW, A_PTR + +c1: + TESTQ $1, N_DIM + JZ end + + // LOAD 1 + INIT1 + + MOVQ M_DIM, M + SHRQ $2, M + JZ c1r2 + +c1r4: + // 1x4 KERNEL + KERNEL_LOAD4 + KERNEL_4x1 + STORE4 + + ADDQ $4*SIZE, Y_PTR + + DECQ M + JNZ c1r4 + +c1r2: + TESTQ $2, M_DIM + JZ c1r1 + + // 1x2 KERNEL + KERNEL_LOAD2 + KERNEL_2x1 + STORE2 + + ADDQ $2*SIZE, Y_PTR + +c1r1: + TESTQ $1, M_DIM + JZ end + + // 1x1 KERNEL + KERNEL_1x1 + +end: + RET + +inc: // Algorithm for incX != 0 ( split loads in kernel ) + XORQ TMP2, TMP2 + MOVQ M, TMP1 + SUBQ $1, TMP1 + IMULQ INC_Y, TMP1 + NEGQ TMP1 + CMPQ INC_Y, $0 + CMOVQLT TMP1, TMP2 + LEAQ (Y_PTR)(TMP2*SIZE), Y_PTR + MOVQ Y_PTR, Y + + SHLQ $3, INC_Y + LEAQ (INC_Y)(INC_Y*2), INC3_Y // INC3_Y = INC_Y * 3 + + MOVSD $1.0, X0 + COMISD beta+88(FP), X0 + JE inc_gemv_start + + MOVSD $0.0, X0 + COMISD beta+88(FP), X0 + JE inc_gemv_clear + + MOVDDUP beta+88(FP), BETA + SHRQ $2, M + JZ inc_scal2 + +inc_scal4: + SCALEINC_4(Y_PTR, INC_Y, INC3_Y, BETA) + LEAQ (Y_PTR)(INC_Y*4), Y_PTR + DECQ M + JNZ inc_scal4 + +inc_scal2: + TESTQ $2, M_DIM + JZ inc_scal1 + + SCALEINC_2(Y_PTR, INC_Y, BETA) + LEAQ (Y_PTR)(INC_Y*2), Y_PTR + +inc_scal1: + TESTQ $1, M_DIM + JZ inc_prep_end + SCALE_1(Y_PTR, BETA) + + JMP inc_prep_end + +inc_gemv_clear: // beta == 0 is special-cased to clear memory (no nan handling) + XORPS X0, X0 + XORPS X1, X1 + XORPS X2, X2 + XORPS X3, X3 + + SHRQ $2, M + JZ inc_clear2 + +inc_clear4: + MOVSD X0, (Y_PTR) + MOVSD X1, (Y_PTR)(INC_Y*1) + MOVSD X2, (Y_PTR)(INC_Y*2) + MOVSD X3, (Y_PTR)(INC3_Y*1) + LEAQ (Y_PTR)(INC_Y*4), Y_PTR + DECQ M + JNZ inc_clear4 + +inc_clear2: + TESTQ $2, M_DIM + JZ inc_clear1 + MOVSD X0, (Y_PTR) + MOVSD X1, (Y_PTR)(INC_Y*1) + LEAQ (Y_PTR)(INC_Y*2), Y_PTR + +inc_clear1: + TESTQ $1, M_DIM + JZ inc_prep_end + MOVSD X0, (Y_PTR) + +inc_prep_end: + MOVQ Y, Y_PTR + MOVQ M_DIM, M + +inc_gemv_start: + SHRQ $2, N + JZ inc_c2 + +inc_c4: + // LOAD 4 + INIT4 + + MOVQ M_DIM, M + SHRQ $2, M + JZ inc_c4r2 + +inc_c4r4: + // 4x4 KERNEL + KERNEL_LOAD4_INC + KERNEL_4x4 + STORE4_INC + + LEAQ (Y_PTR)(INC_Y*4), Y_PTR + + DECQ M + JNZ inc_c4r4 + +inc_c4r2: + TESTQ $2, M_DIM + JZ inc_c4r1 + + // 4x2 KERNEL + KERNEL_LOAD2_INC + KERNEL_2x4 + STORE2_INC + + LEAQ (Y_PTR)(INC_Y*2), Y_PTR + +inc_c4r1: + TESTQ $1, M_DIM + JZ inc_c4end + + // 4x1 KERNEL + KERNEL_1x4 + + ADDQ INC_Y, Y_PTR + +inc_c4end: + LEAQ (X_PTR)(INC_X*4), X_PTR + MOVQ Y, Y_PTR + LEAQ (A_ROW)(LDA*4), A_ROW + MOVQ A_ROW, A_PTR + + DECQ N + JNZ inc_c4 + +inc_c2: + TESTQ $2, N_DIM + JZ inc_c1 + + // LOAD 2 + INIT2 + + MOVQ M_DIM, M + SHRQ $2, M + JZ inc_c2r2 + +inc_c2r4: + // 2x4 KERNEL + KERNEL_LOAD4_INC + KERNEL_4x2 + STORE4_INC + + LEAQ (Y_PTR)(INC_Y*4), Y_PTR + DECQ M + JNZ inc_c2r4 + +inc_c2r2: + TESTQ $2, M_DIM + JZ inc_c2r1 + + // 2x2 KERNEL + KERNEL_LOAD2_INC + KERNEL_2x2 + STORE2_INC + + LEAQ (Y_PTR)(INC_Y*2), Y_PTR + +inc_c2r1: + TESTQ $1, M_DIM + JZ inc_c2end + + // 2x1 KERNEL + KERNEL_1x2 + + ADDQ INC_Y, Y_PTR + +inc_c2end: + LEAQ (X_PTR)(INC_X*2), X_PTR + MOVQ Y, Y_PTR + LEAQ (A_ROW)(LDA*2), A_ROW + MOVQ A_ROW, A_PTR + +inc_c1: + TESTQ $1, N_DIM + JZ inc_end + + // LOAD 1 + INIT1 + + MOVQ M_DIM, M + SHRQ $2, M + JZ inc_c1r2 + +inc_c1r4: + // 1x4 KERNEL + KERNEL_LOAD4_INC + KERNEL_4x1 + STORE4_INC + + LEAQ (Y_PTR)(INC_Y*4), Y_PTR + DECQ M + JNZ inc_c1r4 + +inc_c1r2: + TESTQ $2, M_DIM + JZ inc_c1r1 + + // 1x2 KERNEL + KERNEL_LOAD2_INC + KERNEL_2x1 + STORE2_INC + + LEAQ (Y_PTR)(INC_Y*2), Y_PTR + +inc_c1r1: + TESTQ $1, M_DIM + JZ inc_end + + // 1x1 KERNEL + KERNEL_1x1 + +inc_end: + RET diff --git a/vendor/gonum.org/v1/gonum/internal/asm/f64/ger_amd64.s b/vendor/gonum.org/v1/gonum/internal/asm/f64/ger_amd64.s new file mode 100644 index 0000000000..8cae569138 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/internal/asm/f64/ger_amd64.s @@ -0,0 +1,591 @@ +// Copyright ©2017 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build !noasm,!gccgo,!safe + +#include "textflag.h" + +#define SIZE 8 + +#define M_DIM m+0(FP) +#define M CX +#define N_DIM n+8(FP) +#define N BX + +#define TMP1 R14 +#define TMP2 R15 + +#define X_PTR SI +#define Y y_base+56(FP) +#define Y_PTR DX +#define A_ROW AX +#define A_PTR DI + +#define INC_X R8 +#define INC3_X R9 + +#define INC_Y R10 +#define INC3_Y R11 + +#define LDA R12 +#define LDA3 R13 + +#define ALPHA X0 + +#define LOAD4 \ + PREFETCHNTA (X_PTR )(INC_X*8) \ + MOVDDUP (X_PTR), X1 \ + MOVDDUP (X_PTR)(INC_X*1), X2 \ + MOVDDUP (X_PTR)(INC_X*2), X3 \ + MOVDDUP (X_PTR)(INC3_X*1), X4 \ + MULPD ALPHA, X1 \ + MULPD ALPHA, X2 \ + MULPD ALPHA, X3 \ + MULPD ALPHA, X4 + +#define LOAD2 \ + MOVDDUP (X_PTR), X1 \ + MOVDDUP (X_PTR)(INC_X*1), X2 \ + MULPD ALPHA, X1 \ + MULPD ALPHA, X2 + +#define LOAD1 \ + MOVDDUP (X_PTR), X1 \ + MULPD ALPHA, X1 + +#define KERNEL_LOAD4 \ + MOVUPS (Y_PTR), X5 \ + MOVUPS 2*SIZE(Y_PTR), X6 + +#define KERNEL_LOAD4_INC \ + MOVLPD (Y_PTR), X5 \ + MOVHPD (Y_PTR)(INC_Y*1), X5 \ + MOVLPD (Y_PTR)(INC_Y*2), X6 \ + MOVHPD (Y_PTR)(INC3_Y*1), X6 + +#define KERNEL_LOAD2 \ + MOVUPS (Y_PTR), X5 + +#define KERNEL_LOAD2_INC \ + MOVLPD (Y_PTR), X5 \ + MOVHPD (Y_PTR)(INC_Y*1), X5 + +#define KERNEL_4x4 \ + MOVUPS X5, X7 \ + MOVUPS X6, X8 \ + MOVUPS X5, X9 \ + MOVUPS X6, X10 \ + MOVUPS X5, X11 \ + MOVUPS X6, X12 \ + MULPD X1, X5 \ + MULPD X1, X6 \ + MULPD X2, X7 \ + MULPD X2, X8 \ + MULPD X3, X9 \ + MULPD X3, X10 \ + MULPD X4, X11 \ + MULPD X4, X12 + +#define STORE_4x4 \ + MOVUPS (A_PTR), X13 \ + ADDPD X13, X5 \ + MOVUPS 2*SIZE(A_PTR), X14 \ + ADDPD X14, X6 \ + MOVUPS (A_PTR)(LDA*1), X15 \ + ADDPD X15, X7 \ + MOVUPS 2*SIZE(A_PTR)(LDA*1), X0 \ + ADDPD X0, X8 \ + MOVUPS (A_PTR)(LDA*2), X13 \ + ADDPD X13, X9 \ + MOVUPS 2*SIZE(A_PTR)(LDA*2), X14 \ + ADDPD X14, X10 \ + MOVUPS (A_PTR)(LDA3*1), X15 \ + ADDPD X15, X11 \ + MOVUPS 2*SIZE(A_PTR)(LDA3*1), X0 \ + ADDPD X0, X12 \ + MOVUPS X5, (A_PTR) \ + MOVUPS X6, 2*SIZE(A_PTR) \ + MOVUPS X7, (A_PTR)(LDA*1) \ + MOVUPS X8, 2*SIZE(A_PTR)(LDA*1) \ + MOVUPS X9, (A_PTR)(LDA*2) \ + MOVUPS X10, 2*SIZE(A_PTR)(LDA*2) \ + MOVUPS X11, (A_PTR)(LDA3*1) \ + MOVUPS X12, 2*SIZE(A_PTR)(LDA3*1) \ + ADDQ $4*SIZE, A_PTR + +#define KERNEL_4x2 \ + MOVUPS X5, X6 \ + MOVUPS X5, X7 \ + MOVUPS X5, X8 \ + MULPD X1, X5 \ + MULPD X2, X6 \ + MULPD X3, X7 \ + MULPD X4, X8 + +#define STORE_4x2 \ + MOVUPS (A_PTR), X9 \ + ADDPD X9, X5 \ + MOVUPS (A_PTR)(LDA*1), X10 \ + ADDPD X10, X6 \ + MOVUPS (A_PTR)(LDA*2), X11 \ + ADDPD X11, X7 \ + MOVUPS (A_PTR)(LDA3*1), X12 \ + ADDPD X12, X8 \ + MOVUPS X5, (A_PTR) \ + MOVUPS X6, (A_PTR)(LDA*1) \ + MOVUPS X7, (A_PTR)(LDA*2) \ + MOVUPS X8, (A_PTR)(LDA3*1) \ + ADDQ $2*SIZE, A_PTR + +#define KERNEL_4x1 \ + MOVSD (Y_PTR), X5 \ + MOVSD X5, X6 \ + MOVSD X5, X7 \ + MOVSD X5, X8 \ + MULSD X1, X5 \ + MULSD X2, X6 \ + MULSD X3, X7 \ + MULSD X4, X8 + +#define STORE_4x1 \ + ADDSD (A_PTR), X5 \ + ADDSD (A_PTR)(LDA*1), X6 \ + ADDSD (A_PTR)(LDA*2), X7 \ + ADDSD (A_PTR)(LDA3*1), X8 \ + MOVSD X5, (A_PTR) \ + MOVSD X6, (A_PTR)(LDA*1) \ + MOVSD X7, (A_PTR)(LDA*2) \ + MOVSD X8, (A_PTR)(LDA3*1) \ + ADDQ $SIZE, A_PTR + +#define KERNEL_2x4 \ + MOVUPS X5, X7 \ + MOVUPS X6, X8 \ + MULPD X1, X5 \ + MULPD X1, X6 \ + MULPD X2, X7 \ + MULPD X2, X8 + +#define STORE_2x4 \ + MOVUPS (A_PTR), X9 \ + ADDPD X9, X5 \ + MOVUPS 2*SIZE(A_PTR), X10 \ + ADDPD X10, X6 \ + MOVUPS (A_PTR)(LDA*1), X11 \ + ADDPD X11, X7 \ + MOVUPS 2*SIZE(A_PTR)(LDA*1), X12 \ + ADDPD X12, X8 \ + MOVUPS X5, (A_PTR) \ + MOVUPS X6, 2*SIZE(A_PTR) \ + MOVUPS X7, (A_PTR)(LDA*1) \ + MOVUPS X8, 2*SIZE(A_PTR)(LDA*1) \ + ADDQ $4*SIZE, A_PTR + +#define KERNEL_2x2 \ + MOVUPS X5, X6 \ + MULPD X1, X5 \ + MULPD X2, X6 + +#define STORE_2x2 \ + MOVUPS (A_PTR), X7 \ + ADDPD X7, X5 \ + MOVUPS (A_PTR)(LDA*1), X8 \ + ADDPD X8, X6 \ + MOVUPS X5, (A_PTR) \ + MOVUPS X6, (A_PTR)(LDA*1) \ + ADDQ $2*SIZE, A_PTR + +#define KERNEL_2x1 \ + MOVSD (Y_PTR), X5 \ + MOVSD X5, X6 \ + MULSD X1, X5 \ + MULSD X2, X6 + +#define STORE_2x1 \ + ADDSD (A_PTR), X5 \ + ADDSD (A_PTR)(LDA*1), X6 \ + MOVSD X5, (A_PTR) \ + MOVSD X6, (A_PTR)(LDA*1) \ + ADDQ $SIZE, A_PTR + +#define KERNEL_1x4 \ + MULPD X1, X5 \ + MULPD X1, X6 + +#define STORE_1x4 \ + MOVUPS (A_PTR), X7 \ + ADDPD X7, X5 \ + MOVUPS 2*SIZE(A_PTR), X8 \ + ADDPD X8, X6 \ + MOVUPS X5, (A_PTR) \ + MOVUPS X6, 2*SIZE(A_PTR) \ + ADDQ $4*SIZE, A_PTR + +#define KERNEL_1x2 \ + MULPD X1, X5 + +#define STORE_1x2 \ + MOVUPS (A_PTR), X6 \ + ADDPD X6, X5 \ + MOVUPS X5, (A_PTR) \ + ADDQ $2*SIZE, A_PTR + +#define KERNEL_1x1 \ + MOVSD (Y_PTR), X5 \ + MULSD X1, X5 + +#define STORE_1x1 \ + ADDSD (A_PTR), X5 \ + MOVSD X5, (A_PTR) \ + ADDQ $SIZE, A_PTR + +// func Ger(m, n uintptr, alpha float64, +// x []float64, incX uintptr, +// y []float64, incY uintptr, +// a []float64, lda uintptr) +TEXT ·Ger(SB), NOSPLIT, $0 + MOVQ M_DIM, M + MOVQ N_DIM, N + CMPQ M, $0 + JE end + CMPQ N, $0 + JE end + + MOVDDUP alpha+16(FP), ALPHA + + MOVQ x_base+24(FP), X_PTR + MOVQ y_base+56(FP), Y_PTR + MOVQ a_base+88(FP), A_ROW + MOVQ incX+48(FP), INC_X // INC_X = incX * sizeof(float64) + SHLQ $3, INC_X + MOVQ lda+112(FP), LDA // LDA = LDA * sizeof(float64) + SHLQ $3, LDA + LEAQ (LDA)(LDA*2), LDA3 // LDA3 = LDA * 3 + LEAQ (INC_X)(INC_X*2), INC3_X // INC3_X = INC_X * 3 + MOVQ A_ROW, A_PTR + + XORQ TMP2, TMP2 + MOVQ M, TMP1 + SUBQ $1, TMP1 + IMULQ INC_X, TMP1 + NEGQ TMP1 + CMPQ INC_X, $0 + CMOVQLT TMP1, TMP2 + LEAQ (X_PTR)(TMP2*SIZE), X_PTR + + CMPQ incY+80(FP), $1 // Check for dense vector Y (fast-path) + JG inc + JL end + + SHRQ $2, M + JZ r2 + +r4: + // LOAD 4 + LOAD4 + + MOVQ N_DIM, N + SHRQ $2, N + JZ r4c2 + +r4c4: + // 4x4 KERNEL + KERNEL_LOAD4 + KERNEL_4x4 + STORE_4x4 + + ADDQ $4*SIZE, Y_PTR + + DECQ N + JNZ r4c4 + + // Reload ALPHA after it's clobbered by STORE_4x4 + MOVDDUP alpha+16(FP), ALPHA + +r4c2: + TESTQ $2, N_DIM + JZ r4c1 + + // 4x2 KERNEL + KERNEL_LOAD2 + KERNEL_4x2 + STORE_4x2 + + ADDQ $2*SIZE, Y_PTR + +r4c1: + TESTQ $1, N_DIM + JZ r4end + + // 4x1 KERNEL + KERNEL_4x1 + STORE_4x1 + + ADDQ $SIZE, Y_PTR + +r4end: + LEAQ (X_PTR)(INC_X*4), X_PTR + MOVQ Y, Y_PTR + LEAQ (A_ROW)(LDA*4), A_ROW + MOVQ A_ROW, A_PTR + + DECQ M + JNZ r4 + +r2: + TESTQ $2, M_DIM + JZ r1 + + // LOAD 2 + LOAD2 + + MOVQ N_DIM, N + SHRQ $2, N + JZ r2c2 + +r2c4: + // 2x4 KERNEL + KERNEL_LOAD4 + KERNEL_2x4 + STORE_2x4 + + ADDQ $4*SIZE, Y_PTR + + DECQ N + JNZ r2c4 + +r2c2: + TESTQ $2, N_DIM + JZ r2c1 + + // 2x2 KERNEL + KERNEL_LOAD2 + KERNEL_2x2 + STORE_2x2 + + ADDQ $2*SIZE, Y_PTR + +r2c1: + TESTQ $1, N_DIM + JZ r2end + + // 2x1 KERNEL + KERNEL_2x1 + STORE_2x1 + + ADDQ $SIZE, Y_PTR + +r2end: + LEAQ (X_PTR)(INC_X*2), X_PTR + MOVQ Y, Y_PTR + LEAQ (A_ROW)(LDA*2), A_ROW + MOVQ A_ROW, A_PTR + +r1: + TESTQ $1, M_DIM + JZ end + + // LOAD 1 + LOAD1 + + MOVQ N_DIM, N + SHRQ $2, N + JZ r1c2 + +r1c4: + // 1x4 KERNEL + KERNEL_LOAD4 + KERNEL_1x4 + STORE_1x4 + + ADDQ $4*SIZE, Y_PTR + + DECQ N + JNZ r1c4 + +r1c2: + TESTQ $2, N_DIM + JZ r1c1 + + // 1x2 KERNEL + KERNEL_LOAD2 + KERNEL_1x2 + STORE_1x2 + + ADDQ $2*SIZE, Y_PTR + +r1c1: + TESTQ $1, N_DIM + JZ end + + // 1x1 KERNEL + KERNEL_1x1 + STORE_1x1 + + ADDQ $SIZE, Y_PTR + +end: + RET + +inc: // Algorithm for incY != 1 ( split loads in kernel ) + + MOVQ incY+80(FP), INC_Y // INC_Y = incY * sizeof(float64) + SHLQ $3, INC_Y + LEAQ (INC_Y)(INC_Y*2), INC3_Y // INC3_Y = INC_Y * 3 + + XORQ TMP2, TMP2 + MOVQ N, TMP1 + SUBQ $1, TMP1 + IMULQ INC_Y, TMP1 + NEGQ TMP1 + CMPQ INC_Y, $0 + CMOVQLT TMP1, TMP2 + LEAQ (Y_PTR)(TMP2*SIZE), Y_PTR + + SHRQ $2, M + JZ inc_r2 + +inc_r4: + // LOAD 4 + LOAD4 + + MOVQ N_DIM, N + SHRQ $2, N + JZ inc_r4c2 + +inc_r4c4: + // 4x4 KERNEL + KERNEL_LOAD4_INC + KERNEL_4x4 + STORE_4x4 + + LEAQ (Y_PTR)(INC_Y*4), Y_PTR + DECQ N + JNZ inc_r4c4 + + // Reload ALPHA after it's clobbered by STORE_4x4 + MOVDDUP alpha+16(FP), ALPHA + +inc_r4c2: + TESTQ $2, N_DIM + JZ inc_r4c1 + + // 4x2 KERNEL + KERNEL_LOAD2_INC + KERNEL_4x2 + STORE_4x2 + + LEAQ (Y_PTR)(INC_Y*2), Y_PTR + +inc_r4c1: + TESTQ $1, N_DIM + JZ inc_r4end + + // 4x1 KERNEL + KERNEL_4x1 + STORE_4x1 + + ADDQ INC_Y, Y_PTR + +inc_r4end: + LEAQ (X_PTR)(INC_X*4), X_PTR + MOVQ Y, Y_PTR + LEAQ (A_ROW)(LDA*4), A_ROW + MOVQ A_ROW, A_PTR + + DECQ M + JNZ inc_r4 + +inc_r2: + TESTQ $2, M_DIM + JZ inc_r1 + + // LOAD 2 + LOAD2 + + MOVQ N_DIM, N + SHRQ $2, N + JZ inc_r2c2 + +inc_r2c4: + // 2x4 KERNEL + KERNEL_LOAD4_INC + KERNEL_2x4 + STORE_2x4 + + LEAQ (Y_PTR)(INC_Y*4), Y_PTR + DECQ N + JNZ inc_r2c4 + +inc_r2c2: + TESTQ $2, N_DIM + JZ inc_r2c1 + + // 2x2 KERNEL + KERNEL_LOAD2_INC + KERNEL_2x2 + STORE_2x2 + + LEAQ (Y_PTR)(INC_Y*2), Y_PTR + +inc_r2c1: + TESTQ $1, N_DIM + JZ inc_r2end + + // 2x1 KERNEL + KERNEL_2x1 + STORE_2x1 + + ADDQ INC_Y, Y_PTR + +inc_r2end: + LEAQ (X_PTR)(INC_X*2), X_PTR + MOVQ Y, Y_PTR + LEAQ (A_ROW)(LDA*2), A_ROW + MOVQ A_ROW, A_PTR + +inc_r1: + TESTQ $1, M_DIM + JZ end + + // LOAD 1 + LOAD1 + + MOVQ N_DIM, N + SHRQ $2, N + JZ inc_r1c2 + +inc_r1c4: + // 1x4 KERNEL + KERNEL_LOAD4_INC + KERNEL_1x4 + STORE_1x4 + + LEAQ (Y_PTR)(INC_Y*4), Y_PTR + DECQ N + JNZ inc_r1c4 + +inc_r1c2: + TESTQ $2, N_DIM + JZ inc_r1c1 + + // 1x2 KERNEL + KERNEL_LOAD2_INC + KERNEL_1x2 + STORE_1x2 + + LEAQ (Y_PTR)(INC_Y*2), Y_PTR + +inc_r1c1: + TESTQ $1, N_DIM + JZ end + + // 1x1 KERNEL + KERNEL_1x1 + STORE_1x1 + + ADDQ INC_Y, Y_PTR + +inc_end: + RET diff --git a/vendor/gonum.org/v1/gonum/internal/asm/f64/l1norm_amd64.s b/vendor/gonum.org/v1/gonum/internal/asm/f64/l1norm_amd64.s new file mode 100644 index 0000000000..b4b1fd02fb --- /dev/null +++ b/vendor/gonum.org/v1/gonum/internal/asm/f64/l1norm_amd64.s @@ -0,0 +1,58 @@ +// Copyright ©2016 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build !noasm,!gccgo,!safe + +#include "textflag.h" + +// func L1Dist(s, t []float64) float64 +TEXT ·L1Dist(SB), NOSPLIT, $0 + MOVQ s_base+0(FP), DI // DI = &s + MOVQ t_base+24(FP), SI // SI = &t + MOVQ s_len+8(FP), CX // CX = len(s) + CMPQ t_len+32(FP), CX // CX = max( CX, len(t) ) + CMOVQLE t_len+32(FP), CX + PXOR X3, X3 // norm = 0 + CMPQ CX, $0 // if CX == 0 { return 0 } + JE l1_end + XORQ AX, AX // i = 0 + MOVQ CX, BX + ANDQ $1, BX // BX = CX % 2 + SHRQ $1, CX // CX = floor( CX / 2 ) + JZ l1_tail_start // if CX == 0 { return 0 } + +l1_loop: // Loop unrolled 2x do { + MOVUPS (SI)(AX*8), X0 // X0 = t[i:i+1] + MOVUPS (DI)(AX*8), X1 // X1 = s[i:i+1] + MOVAPS X0, X2 + SUBPD X1, X0 + SUBPD X2, X1 + MAXPD X1, X0 // X0 = max( X0 - X1, X1 - X0 ) + ADDPD X0, X3 // norm += X0 + ADDQ $2, AX // i += 2 + LOOP l1_loop // } while --CX > 0 + CMPQ BX, $0 // if BX == 0 { return } + JE l1_end + +l1_tail_start: // Reset loop registers + MOVQ BX, CX // Loop counter: CX = BX + PXOR X0, X0 // reset X0, X1 to break dependencies + PXOR X1, X1 + +l1_tail: + MOVSD (SI)(AX*8), X0 // X0 = t[i] + MOVSD (DI)(AX*8), X1 // x1 = s[i] + MOVAPD X0, X2 + SUBSD X1, X0 + SUBSD X2, X1 + MAXSD X1, X0 // X0 = max( X0 - X1, X1 - X0 ) + ADDSD X0, X3 // norm += X0 + +l1_end: + MOVAPS X3, X2 + SHUFPD $1, X2, X2 + ADDSD X3, X2 // X2 = X3[1] + X3[0] + MOVSD X2, ret+48(FP) // return X2 + RET + diff --git a/vendor/gonum.org/v1/gonum/internal/asm/f64/l2norm_amd64.s b/vendor/gonum.org/v1/gonum/internal/asm/f64/l2norm_amd64.s new file mode 100644 index 0000000000..86e01c8701 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/internal/asm/f64/l2norm_amd64.s @@ -0,0 +1,109 @@ +// Copyright ©2019 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build !noasm,!gccgo,!safe + +#include "textflag.h" + +#define SUMSQ X0 +#define ABSX X1 +#define SCALE X2 +#define ZERO X3 +#define TMP X4 +#define ABSMASK X5 +#define INF X7 +#define INFMASK X11 +#define NANMASK X12 +#define IDX AX +#define LEN SI +#define X_ DI + +#define ABSMASK_DATA l2nrodata<>+0(SB) +#define INF_DATA l2nrodata<>+8(SB) +#define NAN_DATA l2nrodata<>+16(SB) +// AbsMask +DATA l2nrodata<>+0(SB)/8, $0x7FFFFFFFFFFFFFFF +// Inf +DATA l2nrodata<>+8(SB)/8, $0x7FF0000000000000 +// NaN +DATA l2nrodata<>+16(SB)/8, $0xFFF8000000000000 +GLOBL l2nrodata<>+0(SB), RODATA, $24 + +// L2NormUnitary returns the L2-norm of x. +// func L2NormUnitary(x []float64) (norm float64) +TEXT ·L2NormUnitary(SB), NOSPLIT, $0 + MOVQ x_len+8(FP), LEN // LEN = len(x) + MOVQ x_base+0(FP), X_ + PXOR ZERO, ZERO + CMPQ LEN, $0 // if LEN == 0 { return 0 } + JZ retZero + + PXOR INFMASK, INFMASK + PXOR NANMASK, NANMASK + MOVSD $1.0, SUMSQ // ssq = 1 + XORPS SCALE, SCALE + MOVSD ABSMASK_DATA, ABSMASK + MOVSD INF_DATA, INF + XORQ IDX, IDX // idx == 0 + +initZero: // for ;x[i]==0; i++ {} + // Skip all leading zeros, to avoid divide by zero NaN + MOVSD (X_)(IDX*8), ABSX // absxi = x[i] + UCOMISD ABSX, ZERO + JP retNaN // if isNaN(x[i]) { return NaN } + JNE loop // if x[i] != 0 { goto loop } + INCQ IDX // i++ + CMPQ IDX, LEN + JE retZero // if i == LEN { return 0 } + JMP initZero + +loop: + MOVSD (X_)(IDX*8), ABSX // absxi = x[i] + MOVUPS ABSX, TMP + CMPSD ABSX, TMP, $3 + ORPD TMP, NANMASK // NANMASK = NANMASK | IsNaN(absxi) + MOVSD INF, TMP + ANDPD ABSMASK, ABSX // absxi == Abs(absxi) + CMPSD ABSX, TMP, $0 + ORPD TMP, INFMASK // INFMASK = INFMASK | IsInf(absxi) + UCOMISD SCALE, ABSX + JA adjScale // IF SCALE > ABSXI { goto adjScale } + + DIVSD SCALE, ABSX // absxi = scale / absxi + MULSD ABSX, ABSX // absxi *= absxi + ADDSD ABSX, SUMSQ // sumsq += absxi + INCQ IDX // i++ + CMPQ IDX, LEN + JNE loop // if i < LEN { continue } + JMP retSum // if i == LEN { goto retSum } + +adjScale: // Scale > Absxi + DIVSD ABSX, SCALE // tmp = absxi / scale + MULSD SCALE, SUMSQ // sumsq *= tmp + MULSD SCALE, SUMSQ // sumsq *= tmp + ADDSD $1.0, SUMSQ // sumsq += 1 + MOVUPS ABSX, SCALE // scale = absxi + INCQ IDX // i++ + CMPQ IDX, LEN + JNE loop // if i < LEN { continue } + +retSum: // Calculate return value + SQRTSD SUMSQ, SUMSQ // sumsq = sqrt(sumsq) + MULSD SCALE, SUMSQ // sumsq += scale + MOVQ SUMSQ, R10 // tmp = sumsq + UCOMISD ZERO, INFMASK + CMOVQPS INF_DATA, R10 // if INFMASK { tmp = INF } + UCOMISD ZERO, NANMASK + CMOVQPS NAN_DATA, R10 // if NANMASK { tmp = NaN } + MOVQ R10, norm+24(FP) // return tmp + RET + +retZero: + MOVSD ZERO, norm+24(FP) // return 0 + RET + +retNaN: + MOVSD NAN_DATA, TMP // return NaN + MOVSD TMP, norm+24(FP) + RET diff --git a/vendor/gonum.org/v1/gonum/internal/asm/f64/l2norm_noasm.go b/vendor/gonum.org/v1/gonum/internal/asm/f64/l2norm_noasm.go new file mode 100644 index 0000000000..bfb8fba981 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/internal/asm/f64/l2norm_noasm.go @@ -0,0 +1,93 @@ +// Copyright ©2019 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build !amd64 || noasm || gccgo || safe +// +build !amd64 noasm gccgo safe + +package f64 + +import "math" + +// L2NormUnitary returns the L2-norm of x. +func L2NormUnitary(x []float64) (norm float64) { + var scale float64 + sumSquares := 1.0 + for _, v := range x { + if v == 0 { + continue + } + absxi := math.Abs(v) + if math.IsNaN(absxi) { + return math.NaN() + } + if scale < absxi { + s := scale / absxi + sumSquares = 1 + sumSquares*s*s + scale = absxi + } else { + s := absxi / scale + sumSquares += s * s + } + } + if math.IsInf(scale, 1) { + return math.Inf(1) + } + return scale * math.Sqrt(sumSquares) +} + +// L2NormInc returns the L2-norm of x. +func L2NormInc(x []float64, n, incX uintptr) (norm float64) { + var scale float64 + sumSquares := 1.0 + for ix := uintptr(0); ix < n*incX; ix += incX { + val := x[ix] + if val == 0 { + continue + } + absxi := math.Abs(val) + if math.IsNaN(absxi) { + return math.NaN() + } + if scale < absxi { + s := scale / absxi + sumSquares = 1 + sumSquares*s*s + scale = absxi + } else { + s := absxi / scale + sumSquares += s * s + } + } + if math.IsInf(scale, 1) { + return math.Inf(1) + } + return scale * math.Sqrt(sumSquares) +} + +// L2DistanceUnitary returns the L2-norm of x-y. +func L2DistanceUnitary(x, y []float64) (norm float64) { + var scale float64 + sumSquares := 1.0 + for i, v := range x { + v -= y[i] + if v == 0 { + continue + } + absxi := math.Abs(v) + if math.IsNaN(absxi) { + return math.NaN() + } + if scale < absxi { + s := scale / absxi + sumSquares = 1 + sumSquares*s*s + scale = absxi + } else { + s := absxi / scale + sumSquares += s * s + } + } + if math.IsInf(scale, 1) { + return math.Inf(1) + } + return scale * math.Sqrt(sumSquares) +} diff --git a/vendor/gonum.org/v1/gonum/internal/asm/f64/l2normdist_amd64.s b/vendor/gonum.org/v1/gonum/internal/asm/f64/l2normdist_amd64.s new file mode 100644 index 0000000000..10dcae400e --- /dev/null +++ b/vendor/gonum.org/v1/gonum/internal/asm/f64/l2normdist_amd64.s @@ -0,0 +1,115 @@ +// Copyright ©2019 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build !noasm,!gccgo,!safe + +#include "textflag.h" + +#define SUMSQ X0 +#define ABSX X1 +#define SCALE X2 +#define ZERO X3 +#define TMP X4 +#define ABSMASK X5 +#define INF X7 +#define INFMASK X11 +#define NANMASK X12 +#define IDX AX +#define X_ DI +#define Y_ BX +#define LEN SI + +#define ABSMASK_DATA l2nrodata<>+0(SB) +#define INF_DATA l2nrodata<>+8(SB) +#define NAN_DATA l2nrodata<>+16(SB) +// AbsMask +DATA l2nrodata<>+0(SB)/8, $0x7FFFFFFFFFFFFFFF +// Inf +DATA l2nrodata<>+8(SB)/8, $0x7FF0000000000000 +// NaN +DATA l2nrodata<>+16(SB)/8, $0xFFF8000000000000 +GLOBL l2nrodata<>+0(SB), RODATA, $24 + +// L2DistanceUnitary returns the L2-norm of x-y. +// func L2DistanceUnitary(x,y []float64) (norm float64) +TEXT ·L2DistanceUnitary(SB), NOSPLIT, $0 + MOVQ x_base+0(FP), X_ + MOVQ y_base+24(FP), Y_ + PXOR ZERO, ZERO + MOVQ x_len+8(FP), LEN // LEN = min( len(x), len(y) ) + CMPQ y_len+32(FP), LEN + CMOVQLE y_len+32(FP), LEN + CMPQ LEN, $0 // if LEN == 0 { return 0 } + JZ retZero + + PXOR INFMASK, INFMASK + PXOR NANMASK, NANMASK + MOVSD $1.0, SUMSQ // ssq = 1 + XORPS SCALE, SCALE + MOVSD ABSMASK_DATA, ABSMASK + MOVSD INF_DATA, INF + XORQ IDX, IDX // idx == 0 + +initZero: // for ;x[i]==0; i++ {} + // Skip all leading zeros, to avoid divide by zero NaN + MOVSD (X_)(IDX*8), ABSX // absxi = x[i] + SUBSD (Y_)(IDX*8), ABSX // absxi = x[i]-y[i] + UCOMISD ABSX, ZERO + JP retNaN // if isNaN(absxi) { return NaN } + JNE loop // if absxi != 0 { goto loop } + INCQ IDX // i++ + CMPQ IDX, LEN + JE retZero // if i == LEN { return 0 } + JMP initZero + +loop: + MOVSD (X_)(IDX*8), ABSX // absxi = x[i] + SUBSD (Y_)(IDX*8), ABSX // absxi = x[i]-y[i] + MOVUPS ABSX, TMP + CMPSD ABSX, TMP, $3 + ORPD TMP, NANMASK // NANMASK = NANMASK | IsNaN(absxi) + MOVSD INF, TMP + ANDPD ABSMASK, ABSX // absxi == Abs(absxi) + CMPSD ABSX, TMP, $0 + ORPD TMP, INFMASK // INFMASK = INFMASK | IsInf(absxi) + UCOMISD SCALE, ABSX + JA adjScale // IF SCALE > ABSXI { goto adjScale } + + DIVSD SCALE, ABSX // absxi = scale / absxi + MULSD ABSX, ABSX // absxi *= absxi + ADDSD ABSX, SUMSQ // sumsq += absxi + INCQ IDX // i++ + CMPQ IDX, LEN + JNE loop // if i < LEN { continue } + JMP retSum // if i == LEN { goto retSum } + +adjScale: // Scale > Absxi + DIVSD ABSX, SCALE // tmp = absxi / scale + MULSD SCALE, SUMSQ // sumsq *= tmp + MULSD SCALE, SUMSQ // sumsq *= tmp + ADDSD $1.0, SUMSQ // sumsq += 1 + MOVUPS ABSX, SCALE // scale = absxi + INCQ IDX // i++ + CMPQ IDX, LEN + JNE loop // if i < LEN { continue } + +retSum: // Calculate return value + SQRTSD SUMSQ, SUMSQ // sumsq = sqrt(sumsq) + MULSD SCALE, SUMSQ // sumsq += scale + MOVQ SUMSQ, R10 // tmp = sumsq + UCOMISD ZERO, INFMASK + CMOVQPS INF_DATA, R10 // if INFMASK { tmp = INF } + UCOMISD ZERO, NANMASK + CMOVQPS NAN_DATA, R10 // if NANMASK { tmp = NaN } + MOVQ R10, norm+48(FP) // return tmp + RET + +retZero: + MOVSD ZERO, norm+48(FP) // return 0 + RET + +retNaN: + MOVSD NAN_DATA, TMP // return NaN + MOVSD TMP, norm+48(FP) + RET diff --git a/vendor/gonum.org/v1/gonum/internal/asm/f64/l2norminc_amd64.s b/vendor/gonum.org/v1/gonum/internal/asm/f64/l2norminc_amd64.s new file mode 100644 index 0000000000..8341db93ac --- /dev/null +++ b/vendor/gonum.org/v1/gonum/internal/asm/f64/l2norminc_amd64.s @@ -0,0 +1,110 @@ +// Copyright ©2019 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build !noasm,!gccgo,!safe + +#include "textflag.h" + +#define SUMSQ X0 +#define ABSX X1 +#define SCALE X2 +#define ZERO X3 +#define TMP X4 +#define ABSMASK X5 +#define INF X7 +#define INFMASK X11 +#define NANMASK X12 +#define IDX AX +#define LEN SI +#define INC BX +#define X_ DI + +#define ABSMASK_DATA l2nrodata<>+0(SB) +#define INF_DATA l2nrodata<>+8(SB) +#define NAN_DATA l2nrodata<>+16(SB) +// AbsMask +DATA l2nrodata<>+0(SB)/8, $0x7FFFFFFFFFFFFFFF +// Inf +DATA l2nrodata<>+8(SB)/8, $0x7FF0000000000000 +// NaN +DATA l2nrodata<>+16(SB)/8, $0xFFF8000000000000 +GLOBL l2nrodata<>+0(SB), RODATA, $24 + +// func L2NormInc(x []float64, n, incX uintptr) (norm float64) +TEXT ·L2NormInc(SB), NOSPLIT, $0 + MOVQ n+24(FP), LEN // LEN = len(x) + MOVQ incX+32(FP), INC + MOVQ x_base+0(FP), X_ + XORPS ZERO, ZERO + CMPQ LEN, $0 // if LEN == 0 { return 0 } + JZ retZero + + XORPS INFMASK, INFMASK + XORPS NANMASK, NANMASK + MOVSD $1.0, SUMSQ // ssq = 1 + XORPS SCALE, SCALE + MOVSD ABSMASK_DATA, ABSMASK + MOVSD INF_DATA, INF + SHLQ $3, INC // INC *= sizeof(float64) + +initZero: // for ;x[i]==0; i++ {} + // Skip all leading zeros, to avoid divide by zero NaN + MOVSD (X_), ABSX // absxi = x[i] + UCOMISD ABSX, ZERO + JP retNaN // if isNaN(x[i]) { return NaN } + JNZ loop // if x[i] != 0 { goto loop } + ADDQ INC, X_ // i += INC + DECQ LEN // LEN-- + JZ retZero // if LEN == 0 { return 0 } + JMP initZero + +loop: + MOVSD (X_), ABSX // absxi = x[i] + MOVUPS ABSX, TMP + CMPSD ABSX, TMP, $3 + ORPD TMP, NANMASK // NANMASK = NANMASK | IsNaN(absxi) + MOVSD INF, TMP + ANDPD ABSMASK, ABSX // absxi == Abs(absxi) + CMPSD ABSX, TMP, $0 + ORPD TMP, INFMASK // INFMASK = INFMASK | IsInf(absxi) + UCOMISD SCALE, ABSX + JA adjScale // IF SCALE > ABSXI { goto adjScale } + + DIVSD SCALE, ABSX // absxi = scale / absxi + MULSD ABSX, ABSX // absxi *= absxi + ADDSD ABSX, SUMSQ // sumsq += absxi + ADDQ INC, X_ // i += INC + DECQ LEN // LEN-- + JNZ loop // if LEN > 0 { continue } + JMP retSum // if LEN == 0 { goto retSum } + +adjScale: // Scale > Absxi + DIVSD ABSX, SCALE // tmp = absxi / scale + MULSD SCALE, SUMSQ // sumsq *= tmp + MULSD SCALE, SUMSQ // sumsq *= tmp + ADDSD $1.0, SUMSQ // sumsq += 1 + MOVUPS ABSX, SCALE // scale = absxi + ADDQ INC, X_ // i += INC + DECQ LEN // LEN-- + JNZ loop // if LEN > 0 { continue } + +retSum: // Calculate return value + SQRTSD SUMSQ, SUMSQ // sumsq = sqrt(sumsq) + MULSD SCALE, SUMSQ // sumsq += scale + MOVQ SUMSQ, R10 // tmp = sumsq + UCOMISD ZERO, INFMASK + CMOVQPS INF_DATA, R10 // if INFMASK { tmp = INF } + UCOMISD ZERO, NANMASK + CMOVQPS NAN_DATA, R10 // if NANMASK { tmp = NaN } + MOVQ R10, norm+40(FP) // return tmp + RET + +retZero: + MOVSD ZERO, norm+40(FP) // return 0 + RET + +retNaN: + MOVSD NAN_DATA, TMP // return NaN + MOVSD TMP, norm+40(FP) + RET diff --git a/vendor/gonum.org/v1/gonum/internal/asm/f64/linfnorm_amd64.s b/vendor/gonum.org/v1/gonum/internal/asm/f64/linfnorm_amd64.s new file mode 100644 index 0000000000..ac18b481de --- /dev/null +++ b/vendor/gonum.org/v1/gonum/internal/asm/f64/linfnorm_amd64.s @@ -0,0 +1,57 @@ +// Copyright ©2016 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build !noasm,!gccgo,!safe + +#include "textflag.h" + +// func LinfDist(s, t []float64) float64 +TEXT ·LinfDist(SB), NOSPLIT, $0 + MOVQ s_base+0(FP), DI // DI = &s + MOVQ t_base+24(FP), SI // SI = &t + MOVQ s_len+8(FP), CX // CX = len(s) + CMPQ t_len+32(FP), CX // CX = max( CX, len(t) ) + CMOVQLE t_len+32(FP), CX + PXOR X3, X3 // norm = 0 + CMPQ CX, $0 // if CX == 0 { return 0 } + JE l1_end + XORQ AX, AX // i = 0 + MOVQ CX, BX + ANDQ $1, BX // BX = CX % 2 + SHRQ $1, CX // CX = floor( CX / 2 ) + JZ l1_tail_start // if CX == 0 { return 0 } + +l1_loop: // Loop unrolled 2x do { + MOVUPS (SI)(AX*8), X0 // X0 = t[i:i+1] + MOVUPS (DI)(AX*8), X1 // X1 = s[i:i+1] + MOVAPS X0, X2 + SUBPD X1, X0 + SUBPD X2, X1 + MAXPD X1, X0 // X0 = max( X0 - X1, X1 - X0 ) + MAXPD X0, X3 // norm = max( norm, X0 ) + ADDQ $2, AX // i += 2 + LOOP l1_loop // } while --CX > 0 + CMPQ BX, $0 // if BX == 0 { return } + JE l1_end + +l1_tail_start: // Reset loop registers + MOVQ BX, CX // Loop counter: CX = BX + PXOR X0, X0 // reset X0, X1 to break dependencies + PXOR X1, X1 + +l1_tail: + MOVSD (SI)(AX*8), X0 // X0 = t[i] + MOVSD (DI)(AX*8), X1 // X1 = s[i] + MOVAPD X0, X2 + SUBSD X1, X0 + SUBSD X2, X1 + MAXSD X1, X0 // X0 = max( X0 - X1, X1 - X0 ) + MAXSD X0, X3 // norm = max( norm, X0 ) + +l1_end: + MOVAPS X3, X2 + SHUFPD $1, X2, X2 + MAXSD X3, X2 // X2 = max( X3[1], X3[0] ) + MOVSD X2, ret+48(FP) // return X2 + RET diff --git a/vendor/gonum.org/v1/gonum/internal/asm/f64/scal.go b/vendor/gonum.org/v1/gonum/internal/asm/f64/scal.go new file mode 100644 index 0000000000..c95219e18a --- /dev/null +++ b/vendor/gonum.org/v1/gonum/internal/asm/f64/scal.go @@ -0,0 +1,62 @@ +// Copyright ©2016 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build !amd64 || noasm || gccgo || safe +// +build !amd64 noasm gccgo safe + +package f64 + +// ScalUnitary is +// +// for i := range x { +// x[i] *= alpha +// } +func ScalUnitary(alpha float64, x []float64) { + for i := range x { + x[i] *= alpha + } +} + +// ScalUnitaryTo is +// +// for i, v := range x { +// dst[i] = alpha * v +// } +func ScalUnitaryTo(dst []float64, alpha float64, x []float64) { + for i, v := range x { + dst[i] = alpha * v + } +} + +// ScalInc is +// +// var ix uintptr +// for i := 0; i < int(n); i++ { +// x[ix] *= alpha +// ix += incX +// } +func ScalInc(alpha float64, x []float64, n, incX uintptr) { + var ix uintptr + for i := 0; i < int(n); i++ { + x[ix] *= alpha + ix += incX + } +} + +// ScalIncTo is +// +// var idst, ix uintptr +// for i := 0; i < int(n); i++ { +// dst[idst] = alpha * x[ix] +// ix += incX +// idst += incDst +// } +func ScalIncTo(dst []float64, incDst uintptr, alpha float64, x []float64, n, incX uintptr) { + var idst, ix uintptr + for i := 0; i < int(n); i++ { + dst[idst] = alpha * x[ix] + ix += incX + idst += incDst + } +} diff --git a/vendor/gonum.org/v1/gonum/internal/asm/f64/scalinc_amd64.s b/vendor/gonum.org/v1/gonum/internal/asm/f64/scalinc_amd64.s new file mode 100644 index 0000000000..d623a284f9 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/internal/asm/f64/scalinc_amd64.s @@ -0,0 +1,113 @@ +// Copyright ©2016 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. +// +// Some of the loop unrolling code is copied from: +// http://golang.org/src/math/big/arith_amd64.s +// which is distributed under these terms: +// +// Copyright (c) 2012 The Go Authors. All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// +build !noasm,!gccgo,!safe + +#include "textflag.h" + +#define X_PTR SI +#define LEN CX +#define TAIL BX +#define INC_X R8 +#define INCx3_X R9 +#define ALPHA X0 +#define ALPHA_2 X1 + +// func ScalInc(alpha float64, x []float64, n, incX uintptr) +TEXT ·ScalInc(SB), NOSPLIT, $0 + MOVSD alpha+0(FP), ALPHA // ALPHA = alpha + MOVQ x_base+8(FP), X_PTR // X_PTR = &x + MOVQ incX+40(FP), INC_X // INC_X = incX + SHLQ $3, INC_X // INC_X *= sizeof(float64) + MOVQ n+32(FP), LEN // LEN = n + CMPQ LEN, $0 + JE end // if LEN == 0 { return } + + MOVQ LEN, TAIL + ANDQ $3, TAIL // TAIL = LEN % 4 + SHRQ $2, LEN // LEN = floor( LEN / 4 ) + JZ tail_start // if LEN == 0 { goto tail_start } + + MOVUPS ALPHA, ALPHA_2 // ALPHA_2 = ALPHA for pipelining + LEAQ (INC_X)(INC_X*2), INCx3_X // INCx3_X = INC_X * 3 + +loop: // do { // x[i] *= alpha unrolled 4x. + MOVSD (X_PTR), X2 // X_i = x[i] + MOVSD (X_PTR)(INC_X*1), X3 + MOVSD (X_PTR)(INC_X*2), X4 + MOVSD (X_PTR)(INCx3_X*1), X5 + + MULSD ALPHA, X2 // X_i *= a + MULSD ALPHA_2, X3 + MULSD ALPHA, X4 + MULSD ALPHA_2, X5 + + MOVSD X2, (X_PTR) // x[i] = X_i + MOVSD X3, (X_PTR)(INC_X*1) + MOVSD X4, (X_PTR)(INC_X*2) + MOVSD X5, (X_PTR)(INCx3_X*1) + + LEAQ (X_PTR)(INC_X*4), X_PTR // X_PTR = &(X_PTR[incX*4]) + DECQ LEN + JNZ loop // } while --LEN > 0 + CMPQ TAIL, $0 + JE end // if TAIL == 0 { return } + +tail_start: // Reset loop registers + MOVQ TAIL, LEN // Loop counter: LEN = TAIL + SHRQ $1, LEN // LEN = floor( LEN / 2 ) + JZ tail_one + +tail_two: // do { + MOVSD (X_PTR), X2 // X_i = x[i] + MOVSD (X_PTR)(INC_X*1), X3 + MULSD ALPHA, X2 // X_i *= a + MULSD ALPHA, X3 + MOVSD X2, (X_PTR) // x[i] = X_i + MOVSD X3, (X_PTR)(INC_X*1) + + LEAQ (X_PTR)(INC_X*2), X_PTR // X_PTR = &(X_PTR[incX*2]) + + ANDQ $1, TAIL + JZ end + +tail_one: + MOVSD (X_PTR), X2 // X_i = x[i] + MULSD ALPHA, X2 // X_i *= ALPHA + MOVSD X2, (X_PTR) // x[i] = X_i + +end: + RET diff --git a/vendor/gonum.org/v1/gonum/internal/asm/f64/scalincto_amd64.s b/vendor/gonum.org/v1/gonum/internal/asm/f64/scalincto_amd64.s new file mode 100644 index 0000000000..1c2722098d --- /dev/null +++ b/vendor/gonum.org/v1/gonum/internal/asm/f64/scalincto_amd64.s @@ -0,0 +1,122 @@ +// Copyright ©2016 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. +// +// Some of the loop unrolling code is copied from: +// http://golang.org/src/math/big/arith_amd64.s +// which is distributed under these terms: +// +// Copyright (c) 2012 The Go Authors. All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// +build !noasm,!gccgo,!safe + +#include "textflag.h" + +#define X_PTR SI +#define DST_PTR DI +#define LEN CX +#define TAIL BX +#define INC_X R8 +#define INCx3_X R9 +#define INC_DST R10 +#define INCx3_DST R11 +#define ALPHA X0 +#define ALPHA_2 X1 + +// func ScalIncTo(dst []float64, incDst uintptr, alpha float64, x []float64, n, incX uintptr) +TEXT ·ScalIncTo(SB), NOSPLIT, $0 + MOVQ dst_base+0(FP), DST_PTR // DST_PTR = &dst + MOVQ incDst+24(FP), INC_DST // INC_DST = incDst + SHLQ $3, INC_DST // INC_DST *= sizeof(float64) + MOVSD alpha+32(FP), ALPHA // ALPHA = alpha + MOVQ x_base+40(FP), X_PTR // X_PTR = &x + MOVQ n+64(FP), LEN // LEN = n + MOVQ incX+72(FP), INC_X // INC_X = incX + SHLQ $3, INC_X // INC_X *= sizeof(float64) + CMPQ LEN, $0 + JE end // if LEN == 0 { return } + + MOVQ LEN, TAIL + ANDQ $3, TAIL // TAIL = LEN % 4 + SHRQ $2, LEN // LEN = floor( LEN / 4 ) + JZ tail_start // if LEN == 0 { goto tail_start } + + MOVUPS ALPHA, ALPHA_2 // ALPHA_2 = ALPHA for pipelining + LEAQ (INC_X)(INC_X*2), INCx3_X // INCx3_X = INC_X * 3 + LEAQ (INC_DST)(INC_DST*2), INCx3_DST // INCx3_DST = INC_DST * 3 + +loop: // do { // x[i] *= alpha unrolled 4x. + MOVSD (X_PTR), X2 // X_i = x[i] + MOVSD (X_PTR)(INC_X*1), X3 + MOVSD (X_PTR)(INC_X*2), X4 + MOVSD (X_PTR)(INCx3_X*1), X5 + + MULSD ALPHA, X2 // X_i *= a + MULSD ALPHA_2, X3 + MULSD ALPHA, X4 + MULSD ALPHA_2, X5 + + MOVSD X2, (DST_PTR) // dst[i] = X_i + MOVSD X3, (DST_PTR)(INC_DST*1) + MOVSD X4, (DST_PTR)(INC_DST*2) + MOVSD X5, (DST_PTR)(INCx3_DST*1) + + LEAQ (X_PTR)(INC_X*4), X_PTR // X_PTR = &(X_PTR[incX*4]) + LEAQ (DST_PTR)(INC_DST*4), DST_PTR // DST_PTR = &(DST_PTR[incDst*4]) + DECQ LEN + JNZ loop // } while --LEN > 0 + CMPQ TAIL, $0 + JE end // if TAIL == 0 { return } + +tail_start: // Reset loop registers + MOVQ TAIL, LEN // Loop counter: LEN = TAIL + SHRQ $1, LEN // LEN = floor( LEN / 2 ) + JZ tail_one + +tail_two: + MOVSD (X_PTR), X2 // X_i = x[i] + MOVSD (X_PTR)(INC_X*1), X3 + MULSD ALPHA, X2 // X_i *= a + MULSD ALPHA, X3 + MOVSD X2, (DST_PTR) // dst[i] = X_i + MOVSD X3, (DST_PTR)(INC_DST*1) + + LEAQ (X_PTR)(INC_X*2), X_PTR // X_PTR = &(X_PTR[incX*2]) + LEAQ (DST_PTR)(INC_DST*2), DST_PTR // DST_PTR = &(DST_PTR[incDst*2]) + + ANDQ $1, TAIL + JZ end + +tail_one: + MOVSD (X_PTR), X2 // X_i = x[i] + MULSD ALPHA, X2 // X_i *= ALPHA + MOVSD X2, (DST_PTR) // x[i] = X_i + +end: + RET diff --git a/vendor/gonum.org/v1/gonum/internal/asm/f64/scalunitary_amd64.s b/vendor/gonum.org/v1/gonum/internal/asm/f64/scalunitary_amd64.s new file mode 100644 index 0000000000..6e8f5ca6e1 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/internal/asm/f64/scalunitary_amd64.s @@ -0,0 +1,112 @@ +// Copyright ©2016 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. +// +// Some of the loop unrolling code is copied from: +// http://golang.org/src/math/big/arith_amd64.s +// which is distributed under these terms: +// +// Copyright (c) 2012 The Go Authors. All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// +build !noasm,!gccgo,!safe + +#include "textflag.h" + +#define MOVDDUP_ALPHA LONG $0x44120FF2; WORD $0x0824 // @ MOVDDUP XMM0, 8[RSP] + +#define X_PTR SI +#define DST_PTR DI +#define IDX AX +#define LEN CX +#define TAIL BX +#define ALPHA X0 +#define ALPHA_2 X1 + +// func ScalUnitary(alpha float64, x []float64) +TEXT ·ScalUnitary(SB), NOSPLIT, $0 + MOVDDUP_ALPHA // ALPHA = { alpha, alpha } + MOVQ x_base+8(FP), X_PTR // X_PTR = &x + MOVQ x_len+16(FP), LEN // LEN = len(x) + CMPQ LEN, $0 + JE end // if LEN == 0 { return } + XORQ IDX, IDX // IDX = 0 + + MOVQ LEN, TAIL + ANDQ $7, TAIL // TAIL = LEN % 8 + SHRQ $3, LEN // LEN = floor( LEN / 8 ) + JZ tail_start // if LEN == 0 { goto tail_start } + + MOVUPS ALPHA, ALPHA_2 + +loop: // do { // x[i] *= alpha unrolled 8x. + MOVUPS (X_PTR)(IDX*8), X2 // X_i = x[i] + MOVUPS 16(X_PTR)(IDX*8), X3 + MOVUPS 32(X_PTR)(IDX*8), X4 + MOVUPS 48(X_PTR)(IDX*8), X5 + + MULPD ALPHA, X2 // X_i *= ALPHA + MULPD ALPHA_2, X3 + MULPD ALPHA, X4 + MULPD ALPHA_2, X5 + + MOVUPS X2, (X_PTR)(IDX*8) // x[i] = X_i + MOVUPS X3, 16(X_PTR)(IDX*8) + MOVUPS X4, 32(X_PTR)(IDX*8) + MOVUPS X5, 48(X_PTR)(IDX*8) + + ADDQ $8, IDX // i += 8 + DECQ LEN + JNZ loop // while --LEN > 0 + CMPQ TAIL, $0 + JE end // if TAIL == 0 { return } + +tail_start: // Reset loop registers + MOVQ TAIL, LEN // Loop counter: LEN = TAIL + SHRQ $1, LEN // LEN = floor( TAIL / 2 ) + JZ tail_one // if n == 0 goto end + +tail_two: // do { + MOVUPS (X_PTR)(IDX*8), X2 // X_i = x[i] + MULPD ALPHA, X2 // X_i *= ALPHA + MOVUPS X2, (X_PTR)(IDX*8) // x[i] = X_i + ADDQ $2, IDX // i += 2 + DECQ LEN + JNZ tail_two // while --LEN > 0 + + ANDQ $1, TAIL + JZ end // if TAIL == 0 { return } + +tail_one: + // x[i] *= alpha for the remaining element. + MOVSD (X_PTR)(IDX*8), X2 + MULSD ALPHA, X2 + MOVSD X2, (X_PTR)(IDX*8) + +end: + RET diff --git a/vendor/gonum.org/v1/gonum/internal/asm/f64/scalunitaryto_amd64.s b/vendor/gonum.org/v1/gonum/internal/asm/f64/scalunitaryto_amd64.s new file mode 100644 index 0000000000..986480a5be --- /dev/null +++ b/vendor/gonum.org/v1/gonum/internal/asm/f64/scalunitaryto_amd64.s @@ -0,0 +1,113 @@ +// Copyright ©2016 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. +// +// Some of the loop unrolling code is copied from: +// http://golang.org/src/math/big/arith_amd64.s +// which is distributed under these terms: +// +// Copyright (c) 2012 The Go Authors. All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// +build !noasm,!gccgo,!safe + +#include "textflag.h" + +#define MOVDDUP_ALPHA LONG $0x44120FF2; WORD $0x2024 // @ MOVDDUP 32(SP), X0 /*XMM0, 32[RSP]*/ + +#define X_PTR SI +#define DST_PTR DI +#define IDX AX +#define LEN CX +#define TAIL BX +#define ALPHA X0 +#define ALPHA_2 X1 + +// func ScalUnitaryTo(dst []float64, alpha float64, x []float64) +// This function assumes len(dst) >= len(x). +TEXT ·ScalUnitaryTo(SB), NOSPLIT, $0 + MOVQ x_base+32(FP), X_PTR // X_PTR = &x + MOVQ dst_base+0(FP), DST_PTR // DST_PTR = &dst + MOVDDUP_ALPHA // ALPHA = { alpha, alpha } + MOVQ x_len+40(FP), LEN // LEN = len(x) + CMPQ LEN, $0 + JE end // if LEN == 0 { return } + + XORQ IDX, IDX // IDX = 0 + MOVQ LEN, TAIL + ANDQ $7, TAIL // TAIL = LEN % 8 + SHRQ $3, LEN // LEN = floor( LEN / 8 ) + JZ tail_start // if LEN == 0 { goto tail_start } + + MOVUPS ALPHA, ALPHA_2 // ALPHA_2 = ALPHA for pipelining + +loop: // do { // dst[i] = alpha * x[i] unrolled 8x. + MOVUPS (X_PTR)(IDX*8), X2 // X_i = x[i] + MOVUPS 16(X_PTR)(IDX*8), X3 + MOVUPS 32(X_PTR)(IDX*8), X4 + MOVUPS 48(X_PTR)(IDX*8), X5 + + MULPD ALPHA, X2 // X_i *= ALPHA + MULPD ALPHA_2, X3 + MULPD ALPHA, X4 + MULPD ALPHA_2, X5 + + MOVUPS X2, (DST_PTR)(IDX*8) // dst[i] = X_i + MOVUPS X3, 16(DST_PTR)(IDX*8) + MOVUPS X4, 32(DST_PTR)(IDX*8) + MOVUPS X5, 48(DST_PTR)(IDX*8) + + ADDQ $8, IDX // i += 8 + DECQ LEN + JNZ loop // while --LEN > 0 + CMPQ TAIL, $0 + JE end // if TAIL == 0 { return } + +tail_start: // Reset loop counters + MOVQ TAIL, LEN // Loop counter: LEN = TAIL + SHRQ $1, LEN // LEN = floor( TAIL / 2 ) + JZ tail_one // if LEN == 0 { goto tail_one } + +tail_two: // do { + MOVUPS (X_PTR)(IDX*8), X2 // X_i = x[i] + MULPD ALPHA, X2 // X_i *= ALPHA + MOVUPS X2, (DST_PTR)(IDX*8) // dst[i] = X_i + ADDQ $2, IDX // i += 2 + DECQ LEN + JNZ tail_two // while --LEN > 0 + + ANDQ $1, TAIL + JZ end // if TAIL == 0 { return } + +tail_one: + MOVSD (X_PTR)(IDX*8), X2 // X_i = x[i] + MULSD ALPHA, X2 // X_i *= ALPHA + MOVSD X2, (DST_PTR)(IDX*8) // dst[i] = X_i + +end: + RET diff --git a/vendor/gonum.org/v1/gonum/internal/asm/f64/stubs_amd64.go b/vendor/gonum.org/v1/gonum/internal/asm/f64/stubs_amd64.go new file mode 100644 index 0000000000..7139bedd74 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/internal/asm/f64/stubs_amd64.go @@ -0,0 +1,277 @@ +// Copyright ©2015 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build !noasm && !gccgo && !safe +// +build !noasm,!gccgo,!safe + +package f64 + +// L1Norm is +// +// for _, v := range x { +// sum += math.Abs(v) +// } +// return sum +func L1Norm(x []float64) (sum float64) + +// L1NormInc is +// +// for i := 0; i < n*incX; i += incX { +// sum += math.Abs(x[i]) +// } +// return sum +func L1NormInc(x []float64, n, incX int) (sum float64) + +// AddConst is +// +// for i := range x { +// x[i] += alpha +// } +func AddConst(alpha float64, x []float64) + +// Add is +// +// for i, v := range s { +// dst[i] += v +// } +func Add(dst, s []float64) + +// AxpyUnitary is +// +// for i, v := range x { +// y[i] += alpha * v +// } +func AxpyUnitary(alpha float64, x, y []float64) + +// AxpyUnitaryTo is +// +// for i, v := range x { +// dst[i] = alpha*v + y[i] +// } +func AxpyUnitaryTo(dst []float64, alpha float64, x, y []float64) + +// AxpyInc is +// +// for i := 0; i < int(n); i++ { +// y[iy] += alpha * x[ix] +// ix += incX +// iy += incY +// } +func AxpyInc(alpha float64, x, y []float64, n, incX, incY, ix, iy uintptr) + +// AxpyIncTo is +// +// for i := 0; i < int(n); i++ { +// dst[idst] = alpha*x[ix] + y[iy] +// ix += incX +// iy += incY +// idst += incDst +// } +func AxpyIncTo(dst []float64, incDst, idst uintptr, alpha float64, x, y []float64, n, incX, incY, ix, iy uintptr) + +// CumSum is +// +// if len(s) == 0 { +// return dst +// } +// dst[0] = s[0] +// for i, v := range s[1:] { +// dst[i+1] = dst[i] + v +// } +// return dst +func CumSum(dst, s []float64) []float64 + +// CumProd is +// +// if len(s) == 0 { +// return dst +// } +// dst[0] = s[0] +// for i, v := range s[1:] { +// dst[i+1] = dst[i] * v +// } +// return dst +func CumProd(dst, s []float64) []float64 + +// Div is +// +// for i, v := range s { +// dst[i] /= v +// } +func Div(dst, s []float64) + +// DivTo is +// +// for i, v := range s { +// dst[i] = v / t[i] +// } +// return dst +func DivTo(dst, x, y []float64) []float64 + +// DotUnitary is +// +// for i, v := range x { +// sum += y[i] * v +// } +// return sum +func DotUnitary(x, y []float64) (sum float64) + +// DotInc is +// +// for i := 0; i < int(n); i++ { +// sum += y[iy] * x[ix] +// ix += incX +// iy += incY +// } +// return sum +func DotInc(x, y []float64, n, incX, incY, ix, iy uintptr) (sum float64) + +// L1Dist is +// +// var norm float64 +// for i, v := range s { +// norm += math.Abs(t[i] - v) +// } +// return norm +func L1Dist(s, t []float64) float64 + +// LinfDist is +// +// var norm float64 +// if len(s) == 0 { +// return 0 +// } +// norm = math.Abs(t[0] - s[0]) +// for i, v := range s[1:] { +// absDiff := math.Abs(t[i+1] - v) +// if absDiff > norm || math.IsNaN(norm) { +// norm = absDiff +// } +// } +// return norm +func LinfDist(s, t []float64) float64 + +// ScalUnitary is +// +// for i := range x { +// x[i] *= alpha +// } +func ScalUnitary(alpha float64, x []float64) + +// ScalUnitaryTo is +// +// for i, v := range x { +// dst[i] = alpha * v +// } +func ScalUnitaryTo(dst []float64, alpha float64, x []float64) + +// ScalInc is +// +// var ix uintptr +// for i := 0; i < int(n); i++ { +// x[ix] *= alpha +// ix += incX +// } +func ScalInc(alpha float64, x []float64, n, incX uintptr) + +// ScalIncTo is +// +// var idst, ix uintptr +// for i := 0; i < int(n); i++ { +// dst[idst] = alpha * x[ix] +// ix += incX +// idst += incDst +// } +func ScalIncTo(dst []float64, incDst uintptr, alpha float64, x []float64, n, incX uintptr) + +// Sum is +// +// var sum float64 +// for i := range x { +// sum += x[i] +// } +func Sum(x []float64) float64 + +// L2NormUnitary returns the L2-norm of x. +// +// var scale float64 +// sumSquares := 1.0 +// for _, v := range x { +// if v == 0 { +// continue +// } +// absxi := math.Abs(v) +// if math.IsNaN(absxi) { +// return math.NaN() +// } +// if scale < absxi { +// s := scale / absxi +// sumSquares = 1 + sumSquares*s*s +// scale = absxi +// } else { +// s := absxi / scale +// sumSquares += s * s +// } +// if math.IsInf(scale, 1) { +// return math.Inf(1) +// } +// } +// return scale * math.Sqrt(sumSquares) +func L2NormUnitary(x []float64) (norm float64) + +// L2NormInc returns the L2-norm of x. +// +// var scale float64 +// sumSquares := 1.0 +// for ix := uintptr(0); ix < n*incX; ix += incX { +// val := x[ix] +// if val == 0 { +// continue +// } +// absxi := math.Abs(val) +// if math.IsNaN(absxi) { +// return math.NaN() +// } +// if scale < absxi { +// s := scale / absxi +// sumSquares = 1 + sumSquares*s*s +// scale = absxi +// } else { +// s := absxi / scale +// sumSquares += s * s +// } +// } +// if math.IsInf(scale, 1) { +// return math.Inf(1) +// } +// return scale * math.Sqrt(sumSquares) +func L2NormInc(x []float64, n, incX uintptr) (norm float64) + +// L2DistanceUnitary returns the L2-norm of x-y. +// +// var scale float64 +// sumSquares := 1.0 +// for i, v := range x { +// v -= y[i] +// if v == 0 { +// continue +// } +// absxi := math.Abs(v) +// if math.IsNaN(absxi) { +// return math.NaN() +// } +// if scale < absxi { +// s := scale / absxi +// sumSquares = 1 + sumSquares*s*s +// scale = absxi +// } else { +// s := absxi / scale +// sumSquares += s * s +// } +// } +// if math.IsInf(scale, 1) { +// return math.Inf(1) +// } +// return scale * math.Sqrt(sumSquares) +func L2DistanceUnitary(x, y []float64) (norm float64) diff --git a/vendor/gonum.org/v1/gonum/internal/asm/f64/stubs_noasm.go b/vendor/gonum.org/v1/gonum/internal/asm/f64/stubs_noasm.go new file mode 100644 index 0000000000..f066379191 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/internal/asm/f64/stubs_noasm.go @@ -0,0 +1,182 @@ +// Copyright ©2016 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build !amd64 || noasm || gccgo || safe +// +build !amd64 noasm gccgo safe + +package f64 + +import "math" + +// L1Norm is +// +// for _, v := range x { +// sum += math.Abs(v) +// } +// return sum +func L1Norm(x []float64) (sum float64) { + for _, v := range x { + sum += math.Abs(v) + } + return sum +} + +// L1NormInc is +// +// for i := 0; i < n*incX; i += incX { +// sum += math.Abs(x[i]) +// } +// return sum +func L1NormInc(x []float64, n, incX int) (sum float64) { + for i := 0; i < n*incX; i += incX { + sum += math.Abs(x[i]) + } + return sum +} + +// Add is +// +// for i, v := range s { +// dst[i] += v +// } +func Add(dst, s []float64) { + for i, v := range s { + dst[i] += v + } +} + +// AddConst is +// +// for i := range x { +// x[i] += alpha +// } +func AddConst(alpha float64, x []float64) { + for i := range x { + x[i] += alpha + } +} + +// CumSum is +// +// if len(s) == 0 { +// return dst +// } +// dst[0] = s[0] +// for i, v := range s[1:] { +// dst[i+1] = dst[i] + v +// } +// return dst +func CumSum(dst, s []float64) []float64 { + if len(s) == 0 { + return dst + } + dst[0] = s[0] + for i, v := range s[1:] { + dst[i+1] = dst[i] + v + } + return dst +} + +// CumProd is +// +// if len(s) == 0 { +// return dst +// } +// dst[0] = s[0] +// for i, v := range s[1:] { +// dst[i+1] = dst[i] * v +// } +// return dst +func CumProd(dst, s []float64) []float64 { + if len(s) == 0 { + return dst + } + dst[0] = s[0] + for i, v := range s[1:] { + dst[i+1] = dst[i] * v + } + return dst +} + +// Div is +// +// for i, v := range s { +// dst[i] /= v +// } +func Div(dst, s []float64) { + for i, v := range s { + dst[i] /= v + } +} + +// DivTo is +// +// for i, v := range s { +// dst[i] = v / t[i] +// } +// return dst +func DivTo(dst, s, t []float64) []float64 { + for i, v := range s { + dst[i] = v / t[i] + } + return dst +} + +// L1Dist is +// +// var norm float64 +// for i, v := range s { +// norm += math.Abs(t[i] - v) +// } +// return norm +func L1Dist(s, t []float64) float64 { + var norm float64 + for i, v := range s { + norm += math.Abs(t[i] - v) + } + return norm +} + +// LinfDist is +// +// var norm float64 +// if len(s) == 0 { +// return 0 +// } +// norm = math.Abs(t[0] - s[0]) +// for i, v := range s[1:] { +// absDiff := math.Abs(t[i+1] - v) +// if absDiff > norm || math.IsNaN(norm) { +// norm = absDiff +// } +// } +// return norm +func LinfDist(s, t []float64) float64 { + var norm float64 + if len(s) == 0 { + return 0 + } + norm = math.Abs(t[0] - s[0]) + for i, v := range s[1:] { + absDiff := math.Abs(t[i+1] - v) + if absDiff > norm || math.IsNaN(norm) { + norm = absDiff + } + } + return norm +} + +// Sum is +// +// var sum float64 +// for i := range x { +// sum += x[i] +// } +func Sum(x []float64) float64 { + var sum float64 + for _, v := range x { + sum += v + } + return sum +} diff --git a/vendor/gonum.org/v1/gonum/internal/asm/f64/sum_amd64.s b/vendor/gonum.org/v1/gonum/internal/asm/f64/sum_amd64.s new file mode 100644 index 0000000000..dd77cbd053 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/internal/asm/f64/sum_amd64.s @@ -0,0 +1,99 @@ +// Copyright ©2018 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build !noasm,!gccgo,!safe + +#include "textflag.h" + +#define X_PTR SI +#define IDX AX +#define LEN CX +#define TAIL BX +#define SUM X0 +#define SUM_1 X1 +#define SUM_2 X2 +#define SUM_3 X3 + +// func Sum(x []float64) float64 +TEXT ·Sum(SB), NOSPLIT, $0 + MOVQ x_base+0(FP), X_PTR // X_PTR = &x + MOVQ x_len+8(FP), LEN // LEN = len(x) + XORQ IDX, IDX // i = 0 + PXOR SUM, SUM // p_sum_i = 0 + CMPQ LEN, $0 // if LEN == 0 { return 0 } + JE sum_end + + PXOR SUM_1, SUM_1 + PXOR SUM_2, SUM_2 + PXOR SUM_3, SUM_3 + + MOVQ X_PTR, TAIL // Check memory alignment + ANDQ $15, TAIL // TAIL = &y % 16 + JZ no_trim // if TAIL == 0 { goto no_trim } + + // Align on 16-byte boundary + ADDSD (X_PTR), X0 // X0 += x[0] + INCQ IDX // i++ + DECQ LEN // LEN-- + JZ sum_end // if LEN == 0 { return } + +no_trim: + MOVQ LEN, TAIL + SHRQ $4, LEN // LEN = floor( n / 16 ) + JZ sum_tail8 // if LEN == 0 { goto sum_tail8 } + +sum_loop: // sum 16x wide do { + ADDPD (X_PTR)(IDX*8), SUM // sum_i += x[i:i+2] + ADDPD 16(X_PTR)(IDX*8), SUM_1 + ADDPD 32(X_PTR)(IDX*8), SUM_2 + ADDPD 48(X_PTR)(IDX*8), SUM_3 + ADDPD 64(X_PTR)(IDX*8), SUM + ADDPD 80(X_PTR)(IDX*8), SUM_1 + ADDPD 96(X_PTR)(IDX*8), SUM_2 + ADDPD 112(X_PTR)(IDX*8), SUM_3 + ADDQ $16, IDX // i += 16 + DECQ LEN + JNZ sum_loop // } while --LEN > 0 + +sum_tail8: + TESTQ $8, TAIL + JZ sum_tail4 + + ADDPD (X_PTR)(IDX*8), SUM // sum_i += x[i:i+2] + ADDPD 16(X_PTR)(IDX*8), SUM_1 + ADDPD 32(X_PTR)(IDX*8), SUM_2 + ADDPD 48(X_PTR)(IDX*8), SUM_3 + ADDQ $8, IDX + +sum_tail4: + ADDPD SUM_3, SUM + ADDPD SUM_2, SUM_1 + + TESTQ $4, TAIL + JZ sum_tail2 + + ADDPD (X_PTR)(IDX*8), SUM // sum_i += x[i:i+2] + ADDPD 16(X_PTR)(IDX*8), SUM_1 + ADDQ $4, IDX + +sum_tail2: + ADDPD SUM_1, SUM + + TESTQ $2, TAIL + JZ sum_tail1 + + ADDPD (X_PTR)(IDX*8), SUM // sum_i += x[i:i+2] + ADDQ $2, IDX + +sum_tail1: + HADDPD SUM, SUM // sum_i[0] += sum_i[1] + + TESTQ $1, TAIL + JZ sum_end + + ADDSD (X_PTR)(IDX*8), SUM + +sum_end: // return sum + MOVSD SUM, ret+24(FP) + RET diff --git a/vendor/gonum.org/v1/gonum/internal/cmplx64/abs.go b/vendor/gonum.org/v1/gonum/internal/cmplx64/abs.go new file mode 100644 index 0000000000..ac6eb81c0e --- /dev/null +++ b/vendor/gonum.org/v1/gonum/internal/cmplx64/abs.go @@ -0,0 +1,14 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Copyright ©2017 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package cmplx64 + +import math "gonum.org/v1/gonum/internal/math32" + +// Abs returns the absolute value (also called the modulus) of x. +func Abs(x complex64) float32 { return math.Hypot(real(x), imag(x)) } diff --git a/vendor/gonum.org/v1/gonum/internal/cmplx64/conj.go b/vendor/gonum.org/v1/gonum/internal/cmplx64/conj.go new file mode 100644 index 0000000000..705262f2f9 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/internal/cmplx64/conj.go @@ -0,0 +1,12 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Copyright ©2017 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package cmplx64 + +// Conj returns the complex conjugate of x. +func Conj(x complex64) complex64 { return complex(real(x), -imag(x)) } diff --git a/vendor/gonum.org/v1/gonum/internal/cmplx64/doc.go b/vendor/gonum.org/v1/gonum/internal/cmplx64/doc.go new file mode 100644 index 0000000000..5424ea099c --- /dev/null +++ b/vendor/gonum.org/v1/gonum/internal/cmplx64/doc.go @@ -0,0 +1,7 @@ +// Copyright ©2017 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package cmplx64 provides complex64 versions of standard library math/cmplx +// package routines used by gonum/blas. +package cmplx64 // import "gonum.org/v1/gonum/internal/cmplx64" diff --git a/vendor/gonum.org/v1/gonum/internal/cmplx64/isinf.go b/vendor/gonum.org/v1/gonum/internal/cmplx64/isinf.go new file mode 100644 index 0000000000..21d3d180e1 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/internal/cmplx64/isinf.go @@ -0,0 +1,25 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Copyright ©2017 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package cmplx64 + +import math "gonum.org/v1/gonum/internal/math32" + +// IsInf returns true if either real(x) or imag(x) is an infinity. +func IsInf(x complex64) bool { + if math.IsInf(real(x), 0) || math.IsInf(imag(x), 0) { + return true + } + return false +} + +// Inf returns a complex infinity, complex(+Inf, +Inf). +func Inf() complex64 { + inf := math.Inf(1) + return complex(inf, inf) +} diff --git a/vendor/gonum.org/v1/gonum/internal/cmplx64/isnan.go b/vendor/gonum.org/v1/gonum/internal/cmplx64/isnan.go new file mode 100644 index 0000000000..d6d43dbd1f --- /dev/null +++ b/vendor/gonum.org/v1/gonum/internal/cmplx64/isnan.go @@ -0,0 +1,29 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Copyright ©2017 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package cmplx64 + +import math "gonum.org/v1/gonum/internal/math32" + +// IsNaN returns true if either real(x) or imag(x) is NaN +// and neither is an infinity. +func IsNaN(x complex64) bool { + switch { + case math.IsInf(real(x), 0) || math.IsInf(imag(x), 0): + return false + case math.IsNaN(real(x)) || math.IsNaN(imag(x)): + return true + } + return false +} + +// NaN returns a complex “not-a-number” value. +func NaN() complex64 { + nan := math.NaN() + return complex(nan, nan) +} diff --git a/vendor/gonum.org/v1/gonum/internal/cmplx64/sqrt.go b/vendor/gonum.org/v1/gonum/internal/cmplx64/sqrt.go new file mode 100644 index 0000000000..439987b4ba --- /dev/null +++ b/vendor/gonum.org/v1/gonum/internal/cmplx64/sqrt.go @@ -0,0 +1,108 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Copyright ©2017 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package cmplx64 + +import math "gonum.org/v1/gonum/internal/math32" + +// The original C code, the long comment, and the constants +// below are from http://netlib.sandia.gov/cephes/c9x-complex/clog.c. +// The go code is a simplified version of the original C. +// +// Cephes Math Library Release 2.8: June, 2000 +// Copyright 1984, 1987, 1989, 1992, 2000 by Stephen L. Moshier +// +// The readme file at http://netlib.sandia.gov/cephes/ says: +// Some software in this archive may be from the book _Methods and +// Programs for Mathematical Functions_ (Prentice-Hall or Simon & Schuster +// International, 1989) or from the Cephes Mathematical Library, a +// commercial product. In either event, it is copyrighted by the author. +// What you see here may be used freely but it comes with no support or +// guarantee. +// +// The two known misprints in the book are repaired here in the +// source listings for the gamma function and the incomplete beta +// integral. +// +// Stephen L. Moshier +// moshier@na-net.ornl.gov + +// Complex square root +// +// DESCRIPTION: +// +// If z = x + iy, r = |z|, then +// +// 1/2 +// Re w = [ (r + x)/2 ] , +// +// 1/2 +// Im w = [ (r - x)/2 ] . +// +// Cancelation error in r-x or r+x is avoided by using the +// identity 2 Re w Im w = y. +// +// Note that -w is also a square root of z. The root chosen +// is always in the right half plane and Im w has the same sign as y. +// +// ACCURACY: +// +// Relative error: +// arithmetic domain # trials peak rms +// DEC -10,+10 25000 3.2e-17 9.6e-18 +// IEEE -10,+10 1,000,000 2.9e-16 6.1e-17 + +// Sqrt returns the square root of x. +// The result r is chosen so that real(r) ≥ 0 and imag(r) has the same sign as imag(x). +func Sqrt(x complex64) complex64 { + if imag(x) == 0 { + if real(x) == 0 { + return complex(0, 0) + } + if real(x) < 0 { + return complex(0, math.Sqrt(-real(x))) + } + return complex(math.Sqrt(real(x)), 0) + } + if real(x) == 0 { + if imag(x) < 0 { + r := math.Sqrt(-0.5 * imag(x)) + return complex(r, -r) + } + r := math.Sqrt(0.5 * imag(x)) + return complex(r, r) + } + a := real(x) + b := imag(x) + var scale float32 + // Rescale to avoid internal overflow or underflow. + if math.Abs(a) > 4 || math.Abs(b) > 4 { + a *= 0.25 + b *= 0.25 + scale = 2 + } else { + a *= 1.8014398509481984e16 // 2**54 + b *= 1.8014398509481984e16 + scale = 7.450580596923828125e-9 // 2**-27 + } + r := math.Hypot(a, b) + var t float32 + if a > 0 { + t = math.Sqrt(0.5*r + 0.5*a) + r = scale * math.Abs((0.5*b)/t) + t *= scale + } else { + r = math.Sqrt(0.5*r - 0.5*a) + t = scale * math.Abs((0.5*b)/r) + r *= scale + } + if b < 0 { + return complex(t, -r) + } + return complex(t, r) +} diff --git a/vendor/gonum.org/v1/gonum/internal/math32/doc.go b/vendor/gonum.org/v1/gonum/internal/math32/doc.go new file mode 100644 index 0000000000..68917c64e6 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/internal/math32/doc.go @@ -0,0 +1,7 @@ +// Copyright ©2017 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package math32 provides float32 versions of standard library math package +// routines used by gonum/blas/native. +package math32 // import "gonum.org/v1/gonum/internal/math32" diff --git a/vendor/gonum.org/v1/gonum/internal/math32/math.go b/vendor/gonum.org/v1/gonum/internal/math32/math.go new file mode 100644 index 0000000000..5e92f3d02e --- /dev/null +++ b/vendor/gonum.org/v1/gonum/internal/math32/math.go @@ -0,0 +1,166 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Copyright ©2015 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package math32 + +import ( + "math" +) + +const ( + unan = 0x7fc00000 + uinf = 0x7f800000 + uneginf = 0xff800000 + mask = 0x7f8 >> 3 + shift = 32 - 8 - 1 + bias = 127 +) + +// Abs returns the absolute value of x. +// +// Special cases are: +// +// Abs(±Inf) = +Inf +// Abs(NaN) = NaN +func Abs(x float32) float32 { + switch { + case x < 0: + return -x + case x == 0: + return 0 // return correctly abs(-0) + } + return x +} + +// Copysign returns a value with the magnitude +// of x and the sign of y. +func Copysign(x, y float32) float32 { + const sign = 1 << 31 + return math.Float32frombits(math.Float32bits(x)&^sign | math.Float32bits(y)&sign) +} + +// Hypot returns Sqrt(p*p + q*q), taking care to avoid +// unnecessary overflow and underflow. +// +// Special cases are: +// +// Hypot(±Inf, q) = +Inf +// Hypot(p, ±Inf) = +Inf +// Hypot(NaN, q) = NaN +// Hypot(p, NaN) = NaN +func Hypot(p, q float32) float32 { + // special cases + switch { + case IsInf(p, 0) || IsInf(q, 0): + return Inf(1) + case IsNaN(p) || IsNaN(q): + return NaN() + } + if p < 0 { + p = -p + } + if q < 0 { + q = -q + } + if p < q { + p, q = q, p + } + if p == 0 { + return 0 + } + q = q / p + return p * Sqrt(1+q*q) +} + +// Inf returns positive infinity if sign >= 0, negative infinity if sign < 0. +func Inf(sign int) float32 { + var v uint32 + if sign >= 0 { + v = uinf + } else { + v = uneginf + } + return math.Float32frombits(v) +} + +// IsInf reports whether f is an infinity, according to sign. +// If sign > 0, IsInf reports whether f is positive infinity. +// If sign < 0, IsInf reports whether f is negative infinity. +// If sign == 0, IsInf reports whether f is either infinity. +func IsInf(f float32, sign int) bool { + // Test for infinity by comparing against maximum float. + // To avoid the floating-point hardware, could use: + // x := math.Float32bits(f); + // return sign >= 0 && x == uinf || sign <= 0 && x == uneginf; + return sign >= 0 && f > math.MaxFloat32 || sign <= 0 && f < -math.MaxFloat32 +} + +// IsNaN reports whether f is an IEEE 754 “not-a-number” value. +func IsNaN(f float32) (is bool) { + // IEEE 754 says that only NaNs satisfy f != f. + // To avoid the floating-point hardware, could use: + // x := math.Float32bits(f); + // return uint32(x>>shift)&mask == mask && x != uinf && x != uneginf + return f != f +} + +// Max returns the larger of x or y. +// +// Special cases are: +// +// Max(x, +Inf) = Max(+Inf, x) = +Inf +// Max(x, NaN) = Max(NaN, x) = NaN +// Max(+0, ±0) = Max(±0, +0) = +0 +// Max(-0, -0) = -0 +func Max(x, y float32) float32 { + // special cases + switch { + case IsInf(x, 1) || IsInf(y, 1): + return Inf(1) + case IsNaN(x) || IsNaN(y): + return NaN() + case x == 0 && x == y: + if Signbit(x) { + return y + } + return x + } + if x > y { + return x + } + return y +} + +// Min returns the smaller of x or y. +// +// Special cases are: +// +// Min(x, -Inf) = Min(-Inf, x) = -Inf +// Min(x, NaN) = Min(NaN, x) = NaN +// Min(-0, ±0) = Min(±0, -0) = -0 +func Min(x, y float32) float32 { + // special cases + switch { + case IsInf(x, -1) || IsInf(y, -1): + return Inf(-1) + case IsNaN(x) || IsNaN(y): + return NaN() + case x == 0 && x == y: + if Signbit(x) { + return x + } + return y + } + if x < y { + return x + } + return y +} + +// NaN returns an IEEE 754 “not-a-number” value. +func NaN() float32 { return math.Float32frombits(unan) } diff --git a/vendor/gonum.org/v1/gonum/internal/math32/signbit.go b/vendor/gonum.org/v1/gonum/internal/math32/signbit.go new file mode 100644 index 0000000000..3e9f0bb41d --- /dev/null +++ b/vendor/gonum.org/v1/gonum/internal/math32/signbit.go @@ -0,0 +1,16 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Copyright ©2017 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package math32 + +import "math" + +// Signbit returns true if x is negative or negative zero. +func Signbit(x float32) bool { + return math.Float32bits(x)&(1<<31) != 0 +} diff --git a/vendor/gonum.org/v1/gonum/internal/math32/sqrt.go b/vendor/gonum.org/v1/gonum/internal/math32/sqrt.go new file mode 100644 index 0000000000..41f4a134df --- /dev/null +++ b/vendor/gonum.org/v1/gonum/internal/math32/sqrt.go @@ -0,0 +1,26 @@ +// Copyright ©2015 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build (!amd64 && !arm64) || noasm || gccgo || safe +// +build !amd64,!arm64 noasm gccgo safe + +package math32 + +import ( + "math" +) + +// Sqrt returns the square root of x. +// +// Special cases are: +// +// Sqrt(+Inf) = +Inf +// Sqrt(±0) = ±0 +// Sqrt(x < 0) = NaN +// Sqrt(NaN) = NaN +func Sqrt(x float32) float32 { + // FIXME(kortschak): Direct translation of the math package + // asm code for 386 fails to build. + return float32(math.Sqrt(float64(x))) +} diff --git a/vendor/gonum.org/v1/gonum/internal/math32/sqrt_amd64.go b/vendor/gonum.org/v1/gonum/internal/math32/sqrt_amd64.go new file mode 100644 index 0000000000..eca83f8700 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/internal/math32/sqrt_amd64.go @@ -0,0 +1,22 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Copyright ©2015 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build !noasm && !gccgo && !safe +// +build !noasm,!gccgo,!safe + +package math32 + +// Sqrt returns the square root of x. +// +// Special cases are: +// +// Sqrt(+Inf) = +Inf +// Sqrt(±0) = ±0 +// Sqrt(x < 0) = NaN +// Sqrt(NaN) = NaN +func Sqrt(x float32) float32 diff --git a/vendor/gonum.org/v1/gonum/internal/math32/sqrt_amd64.s b/vendor/gonum.org/v1/gonum/internal/math32/sqrt_amd64.s new file mode 100644 index 0000000000..1c1432a3ca --- /dev/null +++ b/vendor/gonum.org/v1/gonum/internal/math32/sqrt_amd64.s @@ -0,0 +1,17 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Copyright ©2015 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build !noasm,!gccgo,!safe + +#include "textflag.h" + +// func Sqrt(x float32) float32 +TEXT ·Sqrt(SB),NOSPLIT,$0 + SQRTSS x+0(FP), X0 + MOVSS X0, ret+8(FP) + RET diff --git a/vendor/gonum.org/v1/gonum/internal/math32/sqrt_arm64.go b/vendor/gonum.org/v1/gonum/internal/math32/sqrt_arm64.go new file mode 100644 index 0000000000..eca83f8700 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/internal/math32/sqrt_arm64.go @@ -0,0 +1,22 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Copyright ©2015 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build !noasm && !gccgo && !safe +// +build !noasm,!gccgo,!safe + +package math32 + +// Sqrt returns the square root of x. +// +// Special cases are: +// +// Sqrt(+Inf) = +Inf +// Sqrt(±0) = ±0 +// Sqrt(x < 0) = NaN +// Sqrt(NaN) = NaN +func Sqrt(x float32) float32 diff --git a/vendor/gonum.org/v1/gonum/internal/math32/sqrt_arm64.s b/vendor/gonum.org/v1/gonum/internal/math32/sqrt_arm64.s new file mode 100644 index 0000000000..f18b5521d4 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/internal/math32/sqrt_arm64.s @@ -0,0 +1,18 @@ +// Copyright 2015 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Copyright ©2020 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build !noasm,!gccgo,!safe + +#include "textflag.h" + +// func Sqrt(x float32) float32 +TEXT ·Sqrt(SB),NOSPLIT,$0 + FMOVS x+0(FP), F0 + FSQRTS F0, F0 + FMOVS F0, ret+8(FP) + RET diff --git a/vendor/gonum.org/v1/gonum/lapack/.gitignore b/vendor/gonum.org/v1/gonum/lapack/.gitignore new file mode 100644 index 0000000000..e69de29bb2 diff --git a/vendor/gonum.org/v1/gonum/lapack/README.md b/vendor/gonum.org/v1/gonum/lapack/README.md new file mode 100644 index 0000000000..ee23148c97 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/lapack/README.md @@ -0,0 +1,29 @@ +Gonum LAPACK +====== +[![go.dev reference](https://pkg.go.dev/badge/gonum.org/v1/gonum/lapack)](https://pkg.go.dev/gonum.org/v1/gonum/lapack) +[![GoDoc](https://godocs.io/gonum.org/v1/gonum/lapack?status.svg)](https://godocs.io/gonum.org/v1/gonum/lapack) + +A collection of packages to provide LAPACK functionality for the Go programming +language (http://golang.org). This provides a partial implementation in native go +and a wrapper using cgo to a c-based implementation. + +## Installation + +``` + go get gonum.org/v1/gonum/lapack/... +``` + +## Packages + +### lapack + +Defines the LAPACK API based on http://www.netlib.org/lapack/lapacke.html + +### lapack/gonum + +Go implementation of the LAPACK API (incomplete, implements the `float64` API). + +### lapack/lapack64 + +Wrappers for an implementation of the double (i.e., `float64`) precision real parts of +the LAPACK API. diff --git a/vendor/gonum.org/v1/gonum/lapack/doc.go b/vendor/gonum.org/v1/gonum/lapack/doc.go new file mode 100644 index 0000000000..2475cb4aa0 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/lapack/doc.go @@ -0,0 +1,6 @@ +// Copyright ©2018 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package lapack provides interfaces for the LAPACK linear algebra standard. +package lapack // import "gonum.org/v1/gonum/lapack" diff --git a/vendor/gonum.org/v1/gonum/lapack/gonum/dbdsqr.go b/vendor/gonum.org/v1/gonum/lapack/gonum/dbdsqr.go new file mode 100644 index 0000000000..fd421d7ef5 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dbdsqr.go @@ -0,0 +1,506 @@ +// Copyright ©2015 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gonum + +import ( + "math" + + "gonum.org/v1/gonum/blas" + "gonum.org/v1/gonum/blas/blas64" + "gonum.org/v1/gonum/lapack" +) + +// Dbdsqr performs a singular value decomposition of a real n×n bidiagonal matrix. +// +// The SVD of the bidiagonal matrix B is +// +// B = Q * S * Pᵀ +// +// where S is a diagonal matrix of singular values, Q is an orthogonal matrix of +// left singular vectors, and P is an orthogonal matrix of right singular vectors. +// +// Q and P are only computed if requested. If left singular vectors are requested, +// this routine returns U * Q instead of Q, and if right singular vectors are +// requested Pᵀ * VT is returned instead of Pᵀ. +// +// Frequently Dbdsqr is used in conjunction with Dgebrd which reduces a general +// matrix A into bidiagonal form. In this case, the SVD of A is +// +// A = (U * Q) * S * (Pᵀ * VT) +// +// This routine may also compute Qᵀ * C. +// +// d and e contain the elements of the bidiagonal matrix b. d must have length at +// least n, and e must have length at least n-1. Dbdsqr will panic if there is +// insufficient length. On exit, D contains the singular values of B in decreasing +// order. +// +// VT is a matrix of size n×ncvt whose elements are stored in vt. The elements +// of vt are modified to contain Pᵀ * VT on exit. VT is not used if ncvt == 0. +// +// U is a matrix of size nru×n whose elements are stored in u. The elements +// of u are modified to contain U * Q on exit. U is not used if nru == 0. +// +// C is a matrix of size n×ncc whose elements are stored in c. The elements +// of c are modified to contain Qᵀ * C on exit. C is not used if ncc == 0. +// +// work contains temporary storage and must have length at least 4*(n-1). Dbdsqr +// will panic if there is insufficient working memory. +// +// Dbdsqr returns whether the decomposition was successful. +// +// Dbdsqr is an internal routine. It is exported for testing purposes. +func (impl Implementation) Dbdsqr(uplo blas.Uplo, n, ncvt, nru, ncc int, d, e, vt []float64, ldvt int, u []float64, ldu int, c []float64, ldc int, work []float64) (ok bool) { + switch { + case uplo != blas.Upper && uplo != blas.Lower: + panic(badUplo) + case n < 0: + panic(nLT0) + case ncvt < 0: + panic(ncvtLT0) + case nru < 0: + panic(nruLT0) + case ncc < 0: + panic(nccLT0) + case ldvt < max(1, ncvt): + panic(badLdVT) + case (ldu < max(1, n) && nru > 0) || (ldu < 1 && nru == 0): + panic(badLdU) + case ldc < max(1, ncc): + panic(badLdC) + } + + // Quick return if possible. + if n == 0 { + return true + } + + if len(vt) < (n-1)*ldvt+ncvt && ncvt != 0 { + panic(shortVT) + } + if len(u) < (nru-1)*ldu+n && nru != 0 { + panic(shortU) + } + if len(c) < (n-1)*ldc+ncc && ncc != 0 { + panic(shortC) + } + if len(d) < n { + panic(shortD) + } + if len(e) < n-1 { + panic(shortE) + } + if len(work) < 4*(n-1) { + panic(shortWork) + } + + var info int + bi := blas64.Implementation() + const maxIter = 6 + + if n != 1 { + // If the singular vectors do not need to be computed, use qd algorithm. + if !(ncvt > 0 || nru > 0 || ncc > 0) { + info = impl.Dlasq1(n, d, e, work) + // If info is 2 dqds didn't finish, and so try to. + if info != 2 { + return info == 0 + } + } + nm1 := n - 1 + nm12 := nm1 + nm1 + nm13 := nm12 + nm1 + idir := 0 + + eps := dlamchE + unfl := dlamchS + lower := uplo == blas.Lower + var cs, sn, r float64 + if lower { + for i := 0; i < n-1; i++ { + cs, sn, r = impl.Dlartg(d[i], e[i]) + d[i] = r + e[i] = sn * d[i+1] + d[i+1] *= cs + work[i] = cs + work[nm1+i] = sn + } + if nru > 0 { + impl.Dlasr(blas.Right, lapack.Variable, lapack.Forward, nru, n, work, work[n-1:], u, ldu) + } + if ncc > 0 { + impl.Dlasr(blas.Left, lapack.Variable, lapack.Forward, n, ncc, work, work[n-1:], c, ldc) + } + } + // Compute singular values to a relative accuracy of tol. If tol is negative + // the values will be computed to an absolute accuracy of math.Abs(tol) * norm(b) + tolmul := math.Max(10, math.Min(100, math.Pow(eps, -1.0/8))) + tol := tolmul * eps + var smax float64 + for i := 0; i < n; i++ { + smax = math.Max(smax, math.Abs(d[i])) + } + for i := 0; i < n-1; i++ { + smax = math.Max(smax, math.Abs(e[i])) + } + + var smin float64 + var thresh float64 + if tol >= 0 { + sminoa := math.Abs(d[0]) + if sminoa != 0 { + mu := sminoa + for i := 1; i < n; i++ { + mu = math.Abs(d[i]) * (mu / (mu + math.Abs(e[i-1]))) + sminoa = math.Min(sminoa, mu) + if sminoa == 0 { + break + } + } + } + sminoa = sminoa / math.Sqrt(float64(n)) + thresh = math.Max(tol*sminoa, float64(maxIter*n*n)*unfl) + } else { + thresh = math.Max(math.Abs(tol)*smax, float64(maxIter*n*n)*unfl) + } + // Prepare for the main iteration loop for the singular values. + maxIt := maxIter * n * n + iter := 0 + oldl2 := -1 + oldm := -1 + // m points to the last element of unconverged part of matrix. + m := n + + Outer: + for m > 1 { + if iter > maxIt { + info = 0 + for i := 0; i < n-1; i++ { + if e[i] != 0 { + info++ + } + } + return info == 0 + } + // Find diagonal block of matrix to work on. + if tol < 0 && math.Abs(d[m-1]) <= thresh { + d[m-1] = 0 + } + smax = math.Abs(d[m-1]) + var l2 int + var broke bool + for l3 := 0; l3 < m-1; l3++ { + l2 = m - l3 - 2 + abss := math.Abs(d[l2]) + abse := math.Abs(e[l2]) + if tol < 0 && abss <= thresh { + d[l2] = 0 + } + if abse <= thresh { + broke = true + break + } + smax = math.Max(math.Max(smax, abss), abse) + } + if broke { + e[l2] = 0 + if l2 == m-2 { + // Convergence of bottom singular value, return to top. + m-- + continue + } + l2++ + } else { + l2 = 0 + } + // e[ll] through e[m-2] are nonzero, e[ll-1] is zero + if l2 == m-2 { + // Handle 2×2 block separately. + var sinr, cosr, sinl, cosl float64 + d[m-1], d[m-2], sinr, cosr, sinl, cosl = impl.Dlasv2(d[m-2], e[m-2], d[m-1]) + e[m-2] = 0 + if ncvt > 0 { + bi.Drot(ncvt, vt[(m-2)*ldvt:], 1, vt[(m-1)*ldvt:], 1, cosr, sinr) + } + if nru > 0 { + bi.Drot(nru, u[m-2:], ldu, u[m-1:], ldu, cosl, sinl) + } + if ncc > 0 { + bi.Drot(ncc, c[(m-2)*ldc:], 1, c[(m-1)*ldc:], 1, cosl, sinl) + } + m -= 2 + continue + } + // If working on a new submatrix, choose shift direction from larger end + // diagonal element toward smaller. + if l2 > oldm-1 || m-1 < oldl2 { + if math.Abs(d[l2]) >= math.Abs(d[m-1]) { + idir = 1 + } else { + idir = 2 + } + } + // Apply convergence tests. + // TODO(btracey): There is a lot of similar looking code here. See + // if there is a better way to de-duplicate. + if idir == 1 { + // Run convergence test in forward direction. + // First apply standard test to bottom of matrix. + if math.Abs(e[m-2]) <= math.Abs(tol)*math.Abs(d[m-1]) || (tol < 0 && math.Abs(e[m-2]) <= thresh) { + e[m-2] = 0 + continue + } + if tol >= 0 { + // If relative accuracy desired, apply convergence criterion forward. + mu := math.Abs(d[l2]) + smin = mu + for l3 := l2; l3 < m-1; l3++ { + if math.Abs(e[l3]) <= tol*mu { + e[l3] = 0 + continue Outer + } + mu = math.Abs(d[l3+1]) * (mu / (mu + math.Abs(e[l3]))) + smin = math.Min(smin, mu) + } + } + } else { + // Run convergence test in backward direction. + // First apply standard test to top of matrix. + if math.Abs(e[l2]) <= math.Abs(tol)*math.Abs(d[l2]) || (tol < 0 && math.Abs(e[l2]) <= thresh) { + e[l2] = 0 + continue + } + if tol >= 0 { + // If relative accuracy desired, apply convergence criterion backward. + mu := math.Abs(d[m-1]) + smin = mu + for l3 := m - 2; l3 >= l2; l3-- { + if math.Abs(e[l3]) <= tol*mu { + e[l3] = 0 + continue Outer + } + mu = math.Abs(d[l3]) * (mu / (mu + math.Abs(e[l3]))) + smin = math.Min(smin, mu) + } + } + } + oldl2 = l2 + oldm = m + // Compute shift. First, test if shifting would ruin relative accuracy, + // and if so set the shift to zero. + var shift float64 + if tol >= 0 && float64(n)*tol*(smin/smax) <= math.Max(eps, (1.0/100)*tol) { + shift = 0 + } else { + var sl2 float64 + if idir == 1 { + sl2 = math.Abs(d[l2]) + shift, _ = impl.Dlas2(d[m-2], e[m-2], d[m-1]) + } else { + sl2 = math.Abs(d[m-1]) + shift, _ = impl.Dlas2(d[l2], e[l2], d[l2+1]) + } + // Test if shift is negligible + if sl2 > 0 { + if (shift/sl2)*(shift/sl2) < eps { + shift = 0 + } + } + } + iter += m - l2 + 1 + // If no shift, do simplified QR iteration. + if shift == 0 { + if idir == 1 { + cs := 1.0 + oldcs := 1.0 + var sn, r, oldsn float64 + for i := l2; i < m-1; i++ { + cs, sn, r = impl.Dlartg(d[i]*cs, e[i]) + if i > l2 { + e[i-1] = oldsn * r + } + oldcs, oldsn, d[i] = impl.Dlartg(oldcs*r, d[i+1]*sn) + work[i-l2] = cs + work[i-l2+nm1] = sn + work[i-l2+nm12] = oldcs + work[i-l2+nm13] = oldsn + } + h := d[m-1] * cs + d[m-1] = h * oldcs + e[m-2] = h * oldsn + if ncvt > 0 { + impl.Dlasr(blas.Left, lapack.Variable, lapack.Forward, m-l2, ncvt, work, work[n-1:], vt[l2*ldvt:], ldvt) + } + if nru > 0 { + impl.Dlasr(blas.Right, lapack.Variable, lapack.Forward, nru, m-l2, work[nm12:], work[nm13:], u[l2:], ldu) + } + if ncc > 0 { + impl.Dlasr(blas.Left, lapack.Variable, lapack.Forward, m-l2, ncc, work[nm12:], work[nm13:], c[l2*ldc:], ldc) + } + if math.Abs(e[m-2]) < thresh { + e[m-2] = 0 + } + } else { + cs := 1.0 + oldcs := 1.0 + var sn, r, oldsn float64 + for i := m - 1; i >= l2+1; i-- { + cs, sn, r = impl.Dlartg(d[i]*cs, e[i-1]) + if i < m-1 { + e[i] = oldsn * r + } + oldcs, oldsn, d[i] = impl.Dlartg(oldcs*r, d[i-1]*sn) + work[i-l2-1] = cs + work[i-l2+nm1-1] = -sn + work[i-l2+nm12-1] = oldcs + work[i-l2+nm13-1] = -oldsn + } + h := d[l2] * cs + d[l2] = h * oldcs + e[l2] = h * oldsn + if ncvt > 0 { + impl.Dlasr(blas.Left, lapack.Variable, lapack.Backward, m-l2, ncvt, work[nm12:], work[nm13:], vt[l2*ldvt:], ldvt) + } + if nru > 0 { + impl.Dlasr(blas.Right, lapack.Variable, lapack.Backward, nru, m-l2, work, work[n-1:], u[l2:], ldu) + } + if ncc > 0 { + impl.Dlasr(blas.Left, lapack.Variable, lapack.Backward, m-l2, ncc, work, work[n-1:], c[l2*ldc:], ldc) + } + if math.Abs(e[l2]) <= thresh { + e[l2] = 0 + } + } + } else { + // Use nonzero shift. + if idir == 1 { + // Chase bulge from top to bottom. Save cosines and sines for + // later singular vector updates. + f := (math.Abs(d[l2]) - shift) * (math.Copysign(1, d[l2]) + shift/d[l2]) + g := e[l2] + var cosl, sinl float64 + for i := l2; i < m-1; i++ { + cosr, sinr, r := impl.Dlartg(f, g) + if i > l2 { + e[i-1] = r + } + f = cosr*d[i] + sinr*e[i] + e[i] = cosr*e[i] - sinr*d[i] + g = sinr * d[i+1] + d[i+1] *= cosr + cosl, sinl, r = impl.Dlartg(f, g) + d[i] = r + f = cosl*e[i] + sinl*d[i+1] + d[i+1] = cosl*d[i+1] - sinl*e[i] + if i < m-2 { + g = sinl * e[i+1] + e[i+1] = cosl * e[i+1] + } + work[i-l2] = cosr + work[i-l2+nm1] = sinr + work[i-l2+nm12] = cosl + work[i-l2+nm13] = sinl + } + e[m-2] = f + if ncvt > 0 { + impl.Dlasr(blas.Left, lapack.Variable, lapack.Forward, m-l2, ncvt, work, work[n-1:], vt[l2*ldvt:], ldvt) + } + if nru > 0 { + impl.Dlasr(blas.Right, lapack.Variable, lapack.Forward, nru, m-l2, work[nm12:], work[nm13:], u[l2:], ldu) + } + if ncc > 0 { + impl.Dlasr(blas.Left, lapack.Variable, lapack.Forward, m-l2, ncc, work[nm12:], work[nm13:], c[l2*ldc:], ldc) + } + if math.Abs(e[m-2]) <= thresh { + e[m-2] = 0 + } + } else { + // Chase bulge from top to bottom. Save cosines and sines for + // later singular vector updates. + f := (math.Abs(d[m-1]) - shift) * (math.Copysign(1, d[m-1]) + shift/d[m-1]) + g := e[m-2] + for i := m - 1; i > l2; i-- { + cosr, sinr, r := impl.Dlartg(f, g) + if i < m-1 { + e[i] = r + } + f = cosr*d[i] + sinr*e[i-1] + e[i-1] = cosr*e[i-1] - sinr*d[i] + g = sinr * d[i-1] + d[i-1] *= cosr + cosl, sinl, r := impl.Dlartg(f, g) + d[i] = r + f = cosl*e[i-1] + sinl*d[i-1] + d[i-1] = cosl*d[i-1] - sinl*e[i-1] + if i > l2+1 { + g = sinl * e[i-2] + e[i-2] *= cosl + } + work[i-l2-1] = cosr + work[i-l2+nm1-1] = -sinr + work[i-l2+nm12-1] = cosl + work[i-l2+nm13-1] = -sinl + } + e[l2] = f + if math.Abs(e[l2]) <= thresh { + e[l2] = 0 + } + if ncvt > 0 { + impl.Dlasr(blas.Left, lapack.Variable, lapack.Backward, m-l2, ncvt, work[nm12:], work[nm13:], vt[l2*ldvt:], ldvt) + } + if nru > 0 { + impl.Dlasr(blas.Right, lapack.Variable, lapack.Backward, nru, m-l2, work, work[n-1:], u[l2:], ldu) + } + if ncc > 0 { + impl.Dlasr(blas.Left, lapack.Variable, lapack.Backward, m-l2, ncc, work, work[n-1:], c[l2*ldc:], ldc) + } + } + } + } + } + + // All singular values converged, make them positive. + for i := 0; i < n; i++ { + if d[i] < 0 { + d[i] *= -1 + if ncvt > 0 { + bi.Dscal(ncvt, -1, vt[i*ldvt:], 1) + } + } + } + + // Sort the singular values in decreasing order. + for i := 0; i < n-1; i++ { + isub := 0 + smin := d[0] + for j := 1; j < n-i; j++ { + if d[j] <= smin { + isub = j + smin = d[j] + } + } + if isub != n-i { + // Swap singular values and vectors. + d[isub] = d[n-i-1] + d[n-i-1] = smin + if ncvt > 0 { + bi.Dswap(ncvt, vt[isub*ldvt:], 1, vt[(n-i-1)*ldvt:], 1) + } + if nru > 0 { + bi.Dswap(nru, u[isub:], ldu, u[n-i-1:], ldu) + } + if ncc > 0 { + bi.Dswap(ncc, c[isub*ldc:], 1, c[(n-i-1)*ldc:], 1) + } + } + } + info = 0 + for i := 0; i < n-1; i++ { + if e[i] != 0 { + info++ + } + } + return info == 0 +} diff --git a/vendor/gonum.org/v1/gonum/lapack/gonum/dgebak.go b/vendor/gonum.org/v1/gonum/lapack/gonum/dgebak.go new file mode 100644 index 0000000000..b6af972e6a --- /dev/null +++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dgebak.go @@ -0,0 +1,91 @@ +// Copyright ©2016 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gonum + +import ( + "gonum.org/v1/gonum/blas/blas64" + "gonum.org/v1/gonum/lapack" +) + +// Dgebak updates an n×m matrix V as +// +// V = P D V if side == lapack.EVRight, +// V = P D^{-1} V if side == lapack.EVLeft, +// +// where P and D are n×n permutation and scaling matrices, respectively, +// implicitly represented by job, scale, ilo and ihi as returned by Dgebal. +// +// Typically, columns of the matrix V contain the right or left (determined by +// side) eigenvectors of the balanced matrix output by Dgebal, and Dgebak forms +// the eigenvectors of the original matrix. +// +// Dgebak is an internal routine. It is exported for testing purposes. +func (impl Implementation) Dgebak(job lapack.BalanceJob, side lapack.EVSide, n, ilo, ihi int, scale []float64, m int, v []float64, ldv int) { + switch { + case job != lapack.BalanceNone && job != lapack.Permute && job != lapack.Scale && job != lapack.PermuteScale: + panic(badBalanceJob) + case side != lapack.EVLeft && side != lapack.EVRight: + panic(badEVSide) + case n < 0: + panic(nLT0) + case ilo < 0 || max(0, n-1) < ilo: + panic(badIlo) + case ihi < min(ilo, n-1) || n <= ihi: + panic(badIhi) + case m < 0: + panic(mLT0) + case ldv < max(1, m): + panic(badLdV) + } + + // Quick return if possible. + if n == 0 || m == 0 { + return + } + + if len(scale) < n { + panic(shortScale) + } + if len(v) < (n-1)*ldv+m { + panic(shortV) + } + + // Quick return if possible. + if job == lapack.BalanceNone { + return + } + + bi := blas64.Implementation() + if ilo != ihi && job != lapack.Permute { + // Backward balance. + if side == lapack.EVRight { + for i := ilo; i <= ihi; i++ { + bi.Dscal(m, scale[i], v[i*ldv:], 1) + } + } else { + for i := ilo; i <= ihi; i++ { + bi.Dscal(m, 1/scale[i], v[i*ldv:], 1) + } + } + } + if job == lapack.Scale { + return + } + // Backward permutation. + for i := ilo - 1; i >= 0; i-- { + k := int(scale[i]) + if k == i { + continue + } + bi.Dswap(m, v[i*ldv:], 1, v[k*ldv:], 1) + } + for i := ihi + 1; i < n; i++ { + k := int(scale[i]) + if k == i { + continue + } + bi.Dswap(m, v[i*ldv:], 1, v[k*ldv:], 1) + } +} diff --git a/vendor/gonum.org/v1/gonum/lapack/gonum/dgebal.go b/vendor/gonum.org/v1/gonum/lapack/gonum/dgebal.go new file mode 100644 index 0000000000..7623e2faee --- /dev/null +++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dgebal.go @@ -0,0 +1,248 @@ +// Copyright ©2016 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gonum + +import ( + "math" + + "gonum.org/v1/gonum/blas/blas64" + "gonum.org/v1/gonum/lapack" +) + +// Dgebal balances an n×n matrix A. Balancing consists of two stages, permuting +// and scaling. Both steps are optional and depend on the value of job. +// +// Permuting consists of applying a permutation matrix P such that the matrix +// that results from Pᵀ*A*P takes the upper block triangular form +// +// [ T1 X Y ] +// Pᵀ A P = [ 0 B Z ], +// [ 0 0 T2 ] +// +// where T1 and T2 are upper triangular matrices and B contains at least one +// nonzero off-diagonal element in each row and column. The indices ilo and ihi +// mark the starting and ending columns of the submatrix B. The eigenvalues of A +// isolated in the first 0 to ilo-1 and last ihi+1 to n-1 elements on the +// diagonal can be read off without any roundoff error. +// +// Scaling consists of applying a diagonal similarity transformation D such that +// D^{-1}*B*D has the 1-norm of each row and its corresponding column nearly +// equal. The output matrix is +// +// [ T1 X*D Y ] +// [ 0 inv(D)*B*D inv(D)*Z ]. +// [ 0 0 T2 ] +// +// Scaling may reduce the 1-norm of the matrix, and improve the accuracy of +// the computed eigenvalues and/or eigenvectors. +// +// job specifies the operations that will be performed on A. +// If job is lapack.BalanceNone, Dgebal sets scale[i] = 1 for all i and returns ilo=0, ihi=n-1. +// If job is lapack.Permute, only permuting will be done. +// If job is lapack.Scale, only scaling will be done. +// If job is lapack.PermuteScale, both permuting and scaling will be done. +// +// On return, if job is lapack.Permute or lapack.PermuteScale, it will hold that +// +// A[i,j] == 0, for i > j and j ∈ {0, ..., ilo-1, ihi+1, ..., n-1}. +// +// If job is lapack.BalanceNone or lapack.Scale, or if n == 0, it will hold that +// +// ilo == 0 and ihi == n-1. +// +// On return, scale will contain information about the permutations and scaling +// factors applied to A. If π(j) denotes the index of the column interchanged +// with column j, and D[j,j] denotes the scaling factor applied to column j, +// then +// +// scale[j] == π(j), for j ∈ {0, ..., ilo-1, ihi+1, ..., n-1}, +// == D[j,j], for j ∈ {ilo, ..., ihi}. +// +// scale must have length equal to n, otherwise Dgebal will panic. +// +// Dgebal is an internal routine. It is exported for testing purposes. +func (impl Implementation) Dgebal(job lapack.BalanceJob, n int, a []float64, lda int, scale []float64) (ilo, ihi int) { + switch { + case job != lapack.BalanceNone && job != lapack.Permute && job != lapack.Scale && job != lapack.PermuteScale: + panic(badBalanceJob) + case n < 0: + panic(nLT0) + case lda < max(1, n): + panic(badLdA) + } + + ilo = 0 + ihi = n - 1 + + if n == 0 { + return ilo, ihi + } + + if len(scale) != n { + panic(shortScale) + } + + if job == lapack.BalanceNone { + for i := range scale { + scale[i] = 1 + } + return ilo, ihi + } + + if len(a) < (n-1)*lda+n { + panic(shortA) + } + + bi := blas64.Implementation() + swapped := true + + if job == lapack.Scale { + goto scaling + } + + // Permutation to isolate eigenvalues if possible. + // + // Search for rows isolating an eigenvalue and push them down. + for swapped { + swapped = false + rows: + for i := ihi; i >= 0; i-- { + for j := 0; j <= ihi; j++ { + if i == j { + continue + } + if a[i*lda+j] != 0 { + continue rows + } + } + // Row i has only zero off-diagonal elements in the + // block A[ilo:ihi+1,ilo:ihi+1]. + scale[ihi] = float64(i) + if i != ihi { + bi.Dswap(ihi+1, a[i:], lda, a[ihi:], lda) + bi.Dswap(n, a[i*lda:], 1, a[ihi*lda:], 1) + } + if ihi == 0 { + scale[0] = 1 + return ilo, ihi + } + ihi-- + swapped = true + break + } + } + // Search for columns isolating an eigenvalue and push them left. + swapped = true + for swapped { + swapped = false + columns: + for j := ilo; j <= ihi; j++ { + for i := ilo; i <= ihi; i++ { + if i == j { + continue + } + if a[i*lda+j] != 0 { + continue columns + } + } + // Column j has only zero off-diagonal elements in the + // block A[ilo:ihi+1,ilo:ihi+1]. + scale[ilo] = float64(j) + if j != ilo { + bi.Dswap(ihi+1, a[j:], lda, a[ilo:], lda) + bi.Dswap(n-ilo, a[j*lda+ilo:], 1, a[ilo*lda+ilo:], 1) + } + swapped = true + ilo++ + break + } + } + +scaling: + for i := ilo; i <= ihi; i++ { + scale[i] = 1 + } + + if job == lapack.Permute { + return ilo, ihi + } + + // Balance the submatrix in rows ilo to ihi. + + const ( + // sclfac should be a power of 2 to avoid roundoff errors. + // Elements of scale are restricted to powers of sclfac, + // therefore the matrix will be only nearly balanced. + sclfac = 2 + // factor determines the minimum reduction of the row and column + // norms that is considered non-negligible. It must be less than 1. + factor = 0.95 + ) + sfmin1 := dlamchS / dlamchP + sfmax1 := 1 / sfmin1 + sfmin2 := sfmin1 * sclfac + sfmax2 := 1 / sfmin2 + + // Iterative loop for norm reduction. + var conv bool + for !conv { + conv = true + for i := ilo; i <= ihi; i++ { + c := bi.Dnrm2(ihi-ilo+1, a[ilo*lda+i:], lda) + r := bi.Dnrm2(ihi-ilo+1, a[i*lda+ilo:], 1) + ica := bi.Idamax(ihi+1, a[i:], lda) + ca := math.Abs(a[ica*lda+i]) + ira := bi.Idamax(n-ilo, a[i*lda+ilo:], 1) + ra := math.Abs(a[i*lda+ilo+ira]) + + // Guard against zero c or r due to underflow. + if c == 0 || r == 0 { + continue + } + g := r / sclfac + f := 1.0 + s := c + r + for c < g && math.Max(f, math.Max(c, ca)) < sfmax2 && math.Min(r, math.Min(g, ra)) > sfmin2 { + if math.IsNaN(c + f + ca + r + g + ra) { + // Panic if NaN to avoid infinite loop. + panic("lapack: NaN") + } + f *= sclfac + c *= sclfac + ca *= sclfac + g /= sclfac + r /= sclfac + ra /= sclfac + } + g = c / sclfac + for r <= g && math.Max(r, ra) < sfmax2 && math.Min(math.Min(f, c), math.Min(g, ca)) > sfmin2 { + f /= sclfac + c /= sclfac + ca /= sclfac + g /= sclfac + r *= sclfac + ra *= sclfac + } + + if c+r >= factor*s { + // Reduction would be negligible. + continue + } + if f < 1 && scale[i] < 1 && f*scale[i] <= sfmin1 { + continue + } + if f > 1 && scale[i] > 1 && scale[i] >= sfmax1/f { + continue + } + + // Now balance. + scale[i] *= f + bi.Dscal(n-ilo, 1/f, a[i*lda+ilo:], 1) + bi.Dscal(ihi+1, f, a[i:], lda) + conv = false + } + } + return ilo, ihi +} diff --git a/vendor/gonum.org/v1/gonum/lapack/gonum/dgebd2.go b/vendor/gonum.org/v1/gonum/lapack/gonum/dgebd2.go new file mode 100644 index 0000000000..4f323ec500 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dgebd2.go @@ -0,0 +1,88 @@ +// Copyright ©2015 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gonum + +import "gonum.org/v1/gonum/blas" + +// Dgebd2 reduces an m×n matrix A to upper or lower bidiagonal form by an orthogonal +// transformation. +// +// Qᵀ * A * P = B +// +// if m >= n, B is upper diagonal, otherwise B is lower bidiagonal. +// d is the diagonal, len = min(m,n) +// e is the off-diagonal len = min(m,n)-1 +// +// Dgebd2 is an internal routine. It is exported for testing purposes. +func (impl Implementation) Dgebd2(m, n int, a []float64, lda int, d, e, tauQ, tauP, work []float64) { + switch { + case m < 0: + panic(mLT0) + case n < 0: + panic(nLT0) + case lda < max(1, n): + panic(badLdA) + } + + // Quick return if possible. + minmn := min(m, n) + if minmn == 0 { + return + } + + switch { + case len(d) < minmn: + panic(shortD) + case len(e) < minmn-1: + panic(shortE) + case len(tauQ) < minmn: + panic(shortTauQ) + case len(tauP) < minmn: + panic(shortTauP) + case len(work) < max(m, n): + panic(shortWork) + } + + if m >= n { + for i := 0; i < n; i++ { + a[i*lda+i], tauQ[i] = impl.Dlarfg(m-i, a[i*lda+i], a[min(i+1, m-1)*lda+i:], lda) + d[i] = a[i*lda+i] + a[i*lda+i] = 1 + // Apply H_i to A[i:m, i+1:n] from the left. + if i < n-1 { + impl.Dlarf(blas.Left, m-i, n-i-1, a[i*lda+i:], lda, tauQ[i], a[i*lda+i+1:], lda, work) + } + a[i*lda+i] = d[i] + if i < n-1 { + a[i*lda+i+1], tauP[i] = impl.Dlarfg(n-i-1, a[i*lda+i+1], a[i*lda+min(i+2, n-1):], 1) + e[i] = a[i*lda+i+1] + a[i*lda+i+1] = 1 + impl.Dlarf(blas.Right, m-i-1, n-i-1, a[i*lda+i+1:], 1, tauP[i], a[(i+1)*lda+i+1:], lda, work) + a[i*lda+i+1] = e[i] + } else { + tauP[i] = 0 + } + } + return + } + for i := 0; i < m; i++ { + a[i*lda+i], tauP[i] = impl.Dlarfg(n-i, a[i*lda+i], a[i*lda+min(i+1, n-1):], 1) + d[i] = a[i*lda+i] + a[i*lda+i] = 1 + if i < m-1 { + impl.Dlarf(blas.Right, m-i-1, n-i, a[i*lda+i:], 1, tauP[i], a[(i+1)*lda+i:], lda, work) + } + a[i*lda+i] = d[i] + if i < m-1 { + a[(i+1)*lda+i], tauQ[i] = impl.Dlarfg(m-i-1, a[(i+1)*lda+i], a[min(i+2, m-1)*lda+i:], lda) + e[i] = a[(i+1)*lda+i] + a[(i+1)*lda+i] = 1 + impl.Dlarf(blas.Left, m-i-1, n-i-1, a[(i+1)*lda+i:], lda, tauQ[i], a[(i+1)*lda+i+1:], lda, work) + a[(i+1)*lda+i] = e[i] + } else { + tauQ[i] = 0 + } + } +} diff --git a/vendor/gonum.org/v1/gonum/lapack/gonum/dgebrd.go b/vendor/gonum.org/v1/gonum/lapack/gonum/dgebrd.go new file mode 100644 index 0000000000..6b6654ba6b --- /dev/null +++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dgebrd.go @@ -0,0 +1,169 @@ +// Copyright ©2015 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gonum + +import ( + "gonum.org/v1/gonum/blas" + "gonum.org/v1/gonum/blas/blas64" +) + +// Dgebrd reduces a general m×n matrix A to upper or lower bidiagonal form B by +// an orthogonal transformation: +// +// Qᵀ * A * P = B. +// +// The diagonal elements of B are stored in d and the off-diagonal elements are stored +// in e. These are additionally stored along the diagonal of A and the off-diagonal +// of A. If m >= n B is an upper-bidiagonal matrix, and if m < n B is a +// lower-bidiagonal matrix. +// +// The remaining elements of A store the data needed to construct Q and P. +// The matrices Q and P are products of elementary reflectors +// +// if m >= n, Q = H_0 * H_1 * ... * H_{n-1}, +// P = G_0 * G_1 * ... * G_{n-2}, +// if m < n, Q = H_0 * H_1 * ... * H_{m-2}, +// P = G_0 * G_1 * ... * G_{m-1}, +// +// where +// +// H_i = I - tauQ[i] * v_i * v_iᵀ, +// G_i = I - tauP[i] * u_i * u_iᵀ. +// +// As an example, on exit the entries of A when m = 6, and n = 5 +// +// [ d e u1 u1 u1] +// [v1 d e u2 u2] +// [v1 v2 d e u3] +// [v1 v2 v3 d e] +// [v1 v2 v3 v4 d] +// [v1 v2 v3 v4 v5] +// +// and when m = 5, n = 6 +// +// [ d u1 u1 u1 u1 u1] +// [ e d u2 u2 u2 u2] +// [v1 e d u3 u3 u3] +// [v1 v2 e d u4 u4] +// [v1 v2 v3 e d u5] +// +// d, tauQ, and tauP must all have length at least min(m,n), and e must have +// length min(m,n) - 1, unless lwork is -1 when there is no check except for +// work which must have a length of at least one. +// +// work is temporary storage, and lwork specifies the usable memory length. +// At minimum, lwork >= max(1,m,n) or be -1 and this function will panic otherwise. +// Dgebrd is blocked decomposition, but the block size is limited +// by the temporary space available. If lwork == -1, instead of performing Dgebrd, +// the optimal work length will be stored into work[0]. +// +// Dgebrd is an internal routine. It is exported for testing purposes. +func (impl Implementation) Dgebrd(m, n int, a []float64, lda int, d, e, tauQ, tauP, work []float64, lwork int) { + switch { + case m < 0: + panic(mLT0) + case n < 0: + panic(nLT0) + case lda < max(1, n): + panic(badLdA) + case lwork < max(1, max(m, n)) && lwork != -1: + panic(badLWork) + case len(work) < max(1, lwork): + panic(shortWork) + } + + // Quick return if possible. + minmn := min(m, n) + if minmn == 0 { + work[0] = 1 + return + } + + nb := impl.Ilaenv(1, "DGEBRD", " ", m, n, -1, -1) + lwkopt := (m + n) * nb + if lwork == -1 { + work[0] = float64(lwkopt) + return + } + + switch { + case len(a) < (m-1)*lda+n: + panic(shortA) + case len(d) < minmn: + panic(shortD) + case len(e) < minmn-1: + panic(shortE) + case len(tauQ) < minmn: + panic(shortTauQ) + case len(tauP) < minmn: + panic(shortTauP) + } + + nx := minmn + ws := max(m, n) + if 1 < nb && nb < minmn { + // At least one blocked operation can be done. + // Get the crossover point nx. + nx = max(nb, impl.Ilaenv(3, "DGEBRD", " ", m, n, -1, -1)) + // Determine when to switch from blocked to unblocked code. + if nx < minmn { + // At least one blocked operation will be done. + ws = (m + n) * nb + if lwork < ws { + // Not enough work space for the optimal nb, + // consider using a smaller block size. + nbmin := impl.Ilaenv(2, "DGEBRD", " ", m, n, -1, -1) + if lwork >= (m+n)*nbmin { + // Enough work space for minimum block size. + nb = lwork / (m + n) + } else { + nb = minmn + nx = minmn + } + } + } + } + bi := blas64.Implementation() + ldworkx := nb + ldworky := nb + var i int + for i = 0; i < minmn-nx; i += nb { + // Reduce rows and columns i:i+nb to bidiagonal form and return + // the matrices X and Y which are needed to update the unreduced + // part of the matrix. + // X is stored in the first m rows of work, y in the next rows. + x := work[:m*ldworkx] + y := work[m*ldworkx:] + impl.Dlabrd(m-i, n-i, nb, a[i*lda+i:], lda, + d[i:], e[i:], tauQ[i:], tauP[i:], + x, ldworkx, y, ldworky) + + // Update the trailing submatrix A[i+nb:m,i+nb:n], using an update + // of the form A := A - V*Y**T - X*U**T + bi.Dgemm(blas.NoTrans, blas.Trans, m-i-nb, n-i-nb, nb, + -1, a[(i+nb)*lda+i:], lda, y[nb*ldworky:], ldworky, + 1, a[(i+nb)*lda+i+nb:], lda) + + bi.Dgemm(blas.NoTrans, blas.NoTrans, m-i-nb, n-i-nb, nb, + -1, x[nb*ldworkx:], ldworkx, a[i*lda+i+nb:], lda, + 1, a[(i+nb)*lda+i+nb:], lda) + + // Copy diagonal and off-diagonal elements of B back into A. + if m >= n { + for j := i; j < i+nb; j++ { + a[j*lda+j] = d[j] + a[j*lda+j+1] = e[j] + } + } else { + for j := i; j < i+nb; j++ { + a[j*lda+j] = d[j] + a[(j+1)*lda+j] = e[j] + } + } + } + // Use unblocked code to reduce the remainder of the matrix. + impl.Dgebd2(m-i, n-i, a[i*lda+i:], lda, d[i:], e[i:], tauQ[i:], tauP[i:], work) + work[0] = float64(ws) +} diff --git a/vendor/gonum.org/v1/gonum/lapack/gonum/dgecon.go b/vendor/gonum.org/v1/gonum/lapack/gonum/dgecon.go new file mode 100644 index 0000000000..1d04644142 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dgecon.go @@ -0,0 +1,106 @@ +// Copyright ©2015 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gonum + +import ( + "math" + + "gonum.org/v1/gonum/blas" + "gonum.org/v1/gonum/blas/blas64" + "gonum.org/v1/gonum/lapack" +) + +// Dgecon estimates and returns the reciprocal of the condition number of the +// n×n matrix A, in either the 1-norm or the ∞-norm, using the LU factorization +// computed by Dgetrf. +// +// An estimate is obtained for norm(A⁻¹), and the reciprocal of the condition +// number rcond is computed as +// +// rcond 1 / ( norm(A) * norm(A⁻¹) ). +// +// If n is zero, rcond is always 1. +// +// anorm is the 1-norm or the ∞-norm of the original matrix A. anorm must be +// non-negative, otherwise Dgecon will panic. If anorm is 0 or infinity, Dgecon +// returns 0. If anorm is NaN, Dgecon returns NaN. +// +// work must have length at least 4*n and iwork must have length at least n, +// otherwise Dgecon will panic. +func (impl Implementation) Dgecon(norm lapack.MatrixNorm, n int, a []float64, lda int, anorm float64, work []float64, iwork []int) float64 { + switch { + case norm != lapack.MaxColumnSum && norm != lapack.MaxRowSum: + panic(badNorm) + case n < 0: + panic(nLT0) + case lda < max(1, n): + panic(badLdA) + case anorm < 0: + panic(negANorm) + } + + // Quick return if possible. + if n == 0 { + return 1 + } + + switch { + case len(a) < (n-1)*lda+n: + panic(shortA) + case len(work) < 4*n: + panic(shortWork) + case len(iwork) < n: + panic(shortIWork) + } + + // Quick return if possible. + switch { + case anorm == 0: + return 0 + case math.IsNaN(anorm): + // Propagate NaN. + return anorm + case math.IsInf(anorm, 1): + return 0 + } + + bi := blas64.Implementation() + var rcond, ainvnm float64 + var kase int + var normin bool + isave := new([3]int) + onenrm := norm == lapack.MaxColumnSum + smlnum := dlamchS + kase1 := 2 + if onenrm { + kase1 = 1 + } + for { + ainvnm, kase = impl.Dlacn2(n, work[n:], work, iwork, ainvnm, kase, isave) + if kase == 0 { + if ainvnm != 0 { + rcond = (1 / ainvnm) / anorm + } + return rcond + } + var sl, su float64 + if kase == kase1 { + sl = impl.Dlatrs(blas.Lower, blas.NoTrans, blas.Unit, normin, n, a, lda, work, work[2*n:]) + su = impl.Dlatrs(blas.Upper, blas.NoTrans, blas.NonUnit, normin, n, a, lda, work, work[3*n:]) + } else { + su = impl.Dlatrs(blas.Upper, blas.Trans, blas.NonUnit, normin, n, a, lda, work, work[3*n:]) + sl = impl.Dlatrs(blas.Lower, blas.Trans, blas.Unit, normin, n, a, lda, work, work[2*n:]) + } + scale := sl * su + normin = true + if scale != 1 { + ix := bi.Idamax(n, work, 1) + if scale == 0 || scale < math.Abs(work[ix])*smlnum { + return rcond + } + impl.Drscl(n, scale, work, 1) + } + } +} diff --git a/vendor/gonum.org/v1/gonum/lapack/gonum/dgeev.go b/vendor/gonum.org/v1/gonum/lapack/gonum/dgeev.go new file mode 100644 index 0000000000..b49b66fc65 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dgeev.go @@ -0,0 +1,287 @@ +// Copyright ©2016 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gonum + +import ( + "math" + + "gonum.org/v1/gonum/blas" + "gonum.org/v1/gonum/blas/blas64" + "gonum.org/v1/gonum/lapack" +) + +// Dgeev computes the eigenvalues and, optionally, the left and/or right +// eigenvectors for an n×n real nonsymmetric matrix A. +// +// The right eigenvector v_j of A corresponding to an eigenvalue λ_j +// is defined by +// +// A v_j = λ_j v_j, +// +// and the left eigenvector u_j corresponding to an eigenvalue λ_j is defined by +// +// u_jᴴ A = λ_j u_jᴴ, +// +// where u_jᴴ is the conjugate transpose of u_j. +// +// On return, A will be overwritten and the left and right eigenvectors will be +// stored, respectively, in the columns of the n×n matrices VL and VR in the +// same order as their eigenvalues. If the j-th eigenvalue is real, then +// +// u_j = VL[:,j], +// v_j = VR[:,j], +// +// and if it is not real, then j and j+1 form a complex conjugate pair and the +// eigenvectors can be recovered as +// +// u_j = VL[:,j] + i*VL[:,j+1], +// u_{j+1} = VL[:,j] - i*VL[:,j+1], +// v_j = VR[:,j] + i*VR[:,j+1], +// v_{j+1} = VR[:,j] - i*VR[:,j+1], +// +// where i is the imaginary unit. The computed eigenvectors are normalized to +// have Euclidean norm equal to 1 and largest component real. +// +// Left eigenvectors will be computed only if jobvl == lapack.LeftEVCompute, +// otherwise jobvl must be lapack.LeftEVNone. +// Right eigenvectors will be computed only if jobvr == lapack.RightEVCompute, +// otherwise jobvr must be lapack.RightEVNone. +// For other values of jobvl and jobvr Dgeev will panic. +// +// wr and wi contain the real and imaginary parts, respectively, of the computed +// eigenvalues. Complex conjugate pairs of eigenvalues appear consecutively with +// the eigenvalue having the positive imaginary part first. +// wr and wi must have length n, and Dgeev will panic otherwise. +// +// work must have length at least lwork and lwork must be at least max(1,4*n) if +// the left or right eigenvectors are computed, and at least max(1,3*n) if no +// eigenvectors are computed. For good performance, lwork must generally be +// larger. On return, optimal value of lwork will be stored in work[0]. +// +// If lwork == -1, instead of performing Dgeev, the function only calculates the +// optimal value of lwork and stores it into work[0]. +// +// On return, first is the index of the first valid eigenvalue. If first == 0, +// all eigenvalues and eigenvectors have been computed. If first is positive, +// Dgeev failed to compute all the eigenvalues, no eigenvectors have been +// computed and wr[first:] and wi[first:] contain those eigenvalues which have +// converged. +func (impl Implementation) Dgeev(jobvl lapack.LeftEVJob, jobvr lapack.RightEVJob, n int, a []float64, lda int, wr, wi []float64, vl []float64, ldvl int, vr []float64, ldvr int, work []float64, lwork int) (first int) { + wantvl := jobvl == lapack.LeftEVCompute + wantvr := jobvr == lapack.RightEVCompute + var minwrk int + if wantvl || wantvr { + minwrk = max(1, 4*n) + } else { + minwrk = max(1, 3*n) + } + switch { + case jobvl != lapack.LeftEVCompute && jobvl != lapack.LeftEVNone: + panic(badLeftEVJob) + case jobvr != lapack.RightEVCompute && jobvr != lapack.RightEVNone: + panic(badRightEVJob) + case n < 0: + panic(nLT0) + case lda < max(1, n): + panic(badLdA) + case ldvl < 1 || (ldvl < n && wantvl): + panic(badLdVL) + case ldvr < 1 || (ldvr < n && wantvr): + panic(badLdVR) + case lwork < minwrk && lwork != -1: + panic(badLWork) + case len(work) < lwork: + panic(shortWork) + } + + // Quick return if possible. + if n == 0 { + work[0] = 1 + return 0 + } + + maxwrk := 2*n + n*impl.Ilaenv(1, "DGEHRD", " ", n, 1, n, 0) + if wantvl || wantvr { + maxwrk = max(maxwrk, 2*n+(n-1)*impl.Ilaenv(1, "DORGHR", " ", n, 1, n, -1)) + impl.Dhseqr(lapack.EigenvaluesAndSchur, lapack.SchurOrig, n, 0, n-1, + a, lda, wr, wi, nil, n, work, -1) + maxwrk = max(maxwrk, max(n+1, n+int(work[0]))) + side := lapack.EVLeft + if wantvr { + side = lapack.EVRight + } + impl.Dtrevc3(side, lapack.EVAllMulQ, nil, n, a, lda, vl, ldvl, vr, ldvr, + n, work, -1) + maxwrk = max(maxwrk, n+int(work[0])) + maxwrk = max(maxwrk, 4*n) + } else { + impl.Dhseqr(lapack.EigenvaluesOnly, lapack.SchurNone, n, 0, n-1, + a, lda, wr, wi, vr, ldvr, work, -1) + maxwrk = max(maxwrk, max(n+1, n+int(work[0]))) + } + maxwrk = max(maxwrk, minwrk) + + if lwork == -1 { + work[0] = float64(maxwrk) + return 0 + } + + switch { + case len(a) < (n-1)*lda+n: + panic(shortA) + case len(wr) != n: + panic(badLenWr) + case len(wi) != n: + panic(badLenWi) + case len(vl) < (n-1)*ldvl+n && wantvl: + panic(shortVL) + case len(vr) < (n-1)*ldvr+n && wantvr: + panic(shortVR) + } + + // Get machine constants. + smlnum := math.Sqrt(dlamchS) / dlamchP + bignum := 1 / smlnum + + // Scale A if max element outside range [smlnum,bignum]. + anrm := impl.Dlange(lapack.MaxAbs, n, n, a, lda, nil) + var scalea bool + var cscale float64 + if 0 < anrm && anrm < smlnum { + scalea = true + cscale = smlnum + } else if anrm > bignum { + scalea = true + cscale = bignum + } + if scalea { + impl.Dlascl(lapack.General, 0, 0, anrm, cscale, n, n, a, lda) + } + + // Balance the matrix. + workbal := work[:n] + ilo, ihi := impl.Dgebal(lapack.PermuteScale, n, a, lda, workbal) + + // Reduce to upper Hessenberg form. + iwrk := 2 * n + tau := work[n : iwrk-1] + impl.Dgehrd(n, ilo, ihi, a, lda, tau, work[iwrk:], lwork-iwrk) + + var side lapack.EVSide + if wantvl { + side = lapack.EVLeft + // Copy Householder vectors to VL. + impl.Dlacpy(blas.Lower, n, n, a, lda, vl, ldvl) + // Generate orthogonal matrix in VL. + impl.Dorghr(n, ilo, ihi, vl, ldvl, tau, work[iwrk:], lwork-iwrk) + // Perform QR iteration, accumulating Schur vectors in VL. + iwrk = n + first = impl.Dhseqr(lapack.EigenvaluesAndSchur, lapack.SchurOrig, n, ilo, ihi, + a, lda, wr, wi, vl, ldvl, work[iwrk:], lwork-iwrk) + if wantvr { + // Want left and right eigenvectors. + // Copy Schur vectors to VR. + side = lapack.EVBoth + impl.Dlacpy(blas.All, n, n, vl, ldvl, vr, ldvr) + } + } else if wantvr { + side = lapack.EVRight + // Copy Householder vectors to VR. + impl.Dlacpy(blas.Lower, n, n, a, lda, vr, ldvr) + // Generate orthogonal matrix in VR. + impl.Dorghr(n, ilo, ihi, vr, ldvr, tau, work[iwrk:], lwork-iwrk) + // Perform QR iteration, accumulating Schur vectors in VR. + iwrk = n + first = impl.Dhseqr(lapack.EigenvaluesAndSchur, lapack.SchurOrig, n, ilo, ihi, + a, lda, wr, wi, vr, ldvr, work[iwrk:], lwork-iwrk) + } else { + // Compute eigenvalues only. + iwrk = n + first = impl.Dhseqr(lapack.EigenvaluesOnly, lapack.SchurNone, n, ilo, ihi, + a, lda, wr, wi, nil, 1, work[iwrk:], lwork-iwrk) + } + + if first > 0 { + if scalea { + // Undo scaling. + impl.Dlascl(lapack.General, 0, 0, cscale, anrm, n-first, 1, wr[first:], 1) + impl.Dlascl(lapack.General, 0, 0, cscale, anrm, n-first, 1, wi[first:], 1) + impl.Dlascl(lapack.General, 0, 0, cscale, anrm, ilo, 1, wr, 1) + impl.Dlascl(lapack.General, 0, 0, cscale, anrm, ilo, 1, wi, 1) + } + work[0] = float64(maxwrk) + return first + } + + if wantvl || wantvr { + // Compute left and/or right eigenvectors. + impl.Dtrevc3(side, lapack.EVAllMulQ, nil, n, + a, lda, vl, ldvl, vr, ldvr, n, work[iwrk:], lwork-iwrk) + } + bi := blas64.Implementation() + if wantvl { + // Undo balancing of left eigenvectors. + impl.Dgebak(lapack.PermuteScale, lapack.EVLeft, n, ilo, ihi, workbal, n, vl, ldvl) + // Normalize left eigenvectors and make largest component real. + for i, wii := range wi { + if wii < 0 { + continue + } + if wii == 0 { + scl := 1 / bi.Dnrm2(n, vl[i:], ldvl) + bi.Dscal(n, scl, vl[i:], ldvl) + continue + } + scl := 1 / impl.Dlapy2(bi.Dnrm2(n, vl[i:], ldvl), bi.Dnrm2(n, vl[i+1:], ldvl)) + bi.Dscal(n, scl, vl[i:], ldvl) + bi.Dscal(n, scl, vl[i+1:], ldvl) + for k := 0; k < n; k++ { + vi := vl[k*ldvl+i] + vi1 := vl[k*ldvl+i+1] + work[iwrk+k] = vi*vi + vi1*vi1 + } + k := bi.Idamax(n, work[iwrk:iwrk+n], 1) + cs, sn, _ := impl.Dlartg(vl[k*ldvl+i], vl[k*ldvl+i+1]) + bi.Drot(n, vl[i:], ldvl, vl[i+1:], ldvl, cs, sn) + vl[k*ldvl+i+1] = 0 + } + } + if wantvr { + // Undo balancing of right eigenvectors. + impl.Dgebak(lapack.PermuteScale, lapack.EVRight, n, ilo, ihi, workbal, n, vr, ldvr) + // Normalize right eigenvectors and make largest component real. + for i, wii := range wi { + if wii < 0 { + continue + } + if wii == 0 { + scl := 1 / bi.Dnrm2(n, vr[i:], ldvr) + bi.Dscal(n, scl, vr[i:], ldvr) + continue + } + scl := 1 / impl.Dlapy2(bi.Dnrm2(n, vr[i:], ldvr), bi.Dnrm2(n, vr[i+1:], ldvr)) + bi.Dscal(n, scl, vr[i:], ldvr) + bi.Dscal(n, scl, vr[i+1:], ldvr) + for k := 0; k < n; k++ { + vi := vr[k*ldvr+i] + vi1 := vr[k*ldvr+i+1] + work[iwrk+k] = vi*vi + vi1*vi1 + } + k := bi.Idamax(n, work[iwrk:iwrk+n], 1) + cs, sn, _ := impl.Dlartg(vr[k*ldvr+i], vr[k*ldvr+i+1]) + bi.Drot(n, vr[i:], ldvr, vr[i+1:], ldvr, cs, sn) + vr[k*ldvr+i+1] = 0 + } + } + + if scalea { + // Undo scaling. + impl.Dlascl(lapack.General, 0, 0, cscale, anrm, n-first, 1, wr[first:], 1) + impl.Dlascl(lapack.General, 0, 0, cscale, anrm, n-first, 1, wi[first:], 1) + } + + work[0] = float64(maxwrk) + return first +} diff --git a/vendor/gonum.org/v1/gonum/lapack/gonum/dgehd2.go b/vendor/gonum.org/v1/gonum/lapack/gonum/dgehd2.go new file mode 100644 index 0000000000..64b0cb4028 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dgehd2.go @@ -0,0 +1,105 @@ +// Copyright ©2016 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gonum + +import "gonum.org/v1/gonum/blas" + +// Dgehd2 reduces a block of a general n×n matrix A to upper Hessenberg form H +// by an orthogonal similarity transformation Qᵀ * A * Q = H. +// +// The matrix Q is represented as a product of (ihi-ilo) elementary +// reflectors +// +// Q = H_{ilo} H_{ilo+1} ... H_{ihi-1}. +// +// Each H_i has the form +// +// H_i = I - tau[i] * v * vᵀ +// +// where v is a real vector with v[0:i+1] = 0, v[i+1] = 1 and v[ihi+1:n] = 0. +// v[i+2:ihi+1] is stored on exit in A[i+2:ihi+1,i]. +// +// On entry, a contains the n×n general matrix to be reduced. On return, the +// upper triangle and the first subdiagonal of A are overwritten with the upper +// Hessenberg matrix H, and the elements below the first subdiagonal, with the +// slice tau, represent the orthogonal matrix Q as a product of elementary +// reflectors. +// +// The contents of A are illustrated by the following example, with n = 7, ilo = +// 1 and ihi = 5. +// On entry, +// +// [ a a a a a a a ] +// [ a a a a a a ] +// [ a a a a a a ] +// [ a a a a a a ] +// [ a a a a a a ] +// [ a a a a a a ] +// [ a ] +// +// on return, +// +// [ a a h h h h a ] +// [ a h h h h a ] +// [ h h h h h h ] +// [ v1 h h h h h ] +// [ v1 v2 h h h h ] +// [ v1 v2 v3 h h h ] +// [ a ] +// +// where a denotes an element of the original matrix A, h denotes a +// modified element of the upper Hessenberg matrix H, and vi denotes an +// element of the vector defining H_i. +// +// ilo and ihi determine the block of A that will be reduced to upper Hessenberg +// form. It must hold that 0 <= ilo <= ihi <= max(0, n-1), otherwise Dgehd2 will +// panic. +// +// On return, tau will contain the scalar factors of the elementary reflectors. +// It must have length equal to n-1, otherwise Dgehd2 will panic. +// +// work must have length at least n, otherwise Dgehd2 will panic. +// +// Dgehd2 is an internal routine. It is exported for testing purposes. +func (impl Implementation) Dgehd2(n, ilo, ihi int, a []float64, lda int, tau, work []float64) { + switch { + case n < 0: + panic(nLT0) + case ilo < 0 || max(0, n-1) < ilo: + panic(badIlo) + case ihi < min(ilo, n-1) || n <= ihi: + panic(badIhi) + case lda < max(1, n): + panic(badLdA) + } + + // Quick return if possible. + if n == 0 { + return + } + + switch { + case len(a) < (n-1)*lda+n: + panic(shortA) + case len(tau) != n-1: + panic(badLenTau) + case len(work) < n: + panic(shortWork) + } + + for i := ilo; i < ihi; i++ { + // Compute elementary reflector H_i to annihilate A[i+2:ihi+1,i]. + var aii float64 + aii, tau[i] = impl.Dlarfg(ihi-i, a[(i+1)*lda+i], a[min(i+2, n-1)*lda+i:], lda) + a[(i+1)*lda+i] = 1 + + // Apply H_i to A[0:ihi+1,i+1:ihi+1] from the right. + impl.Dlarf(blas.Right, ihi+1, ihi-i, a[(i+1)*lda+i:], lda, tau[i], a[i+1:], lda, work) + + // Apply H_i to A[i+1:ihi+1,i+1:n] from the left. + impl.Dlarf(blas.Left, ihi-i, n-i-1, a[(i+1)*lda+i:], lda, tau[i], a[(i+1)*lda+i+1:], lda, work) + a[(i+1)*lda+i] = aii + } +} diff --git a/vendor/gonum.org/v1/gonum/lapack/gonum/dgehrd.go b/vendor/gonum.org/v1/gonum/lapack/gonum/dgehrd.go new file mode 100644 index 0000000000..ae1533029d --- /dev/null +++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dgehrd.go @@ -0,0 +1,202 @@ +// Copyright ©2016 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gonum + +import ( + "gonum.org/v1/gonum/blas" + "gonum.org/v1/gonum/blas/blas64" + "gonum.org/v1/gonum/lapack" +) + +// Dgehrd reduces a block of a real n×n general matrix A to upper Hessenberg +// form H by an orthogonal similarity transformation Qᵀ * A * Q = H. +// +// The matrix Q is represented as a product of (ihi-ilo) elementary +// reflectors +// +// Q = H_{ilo} H_{ilo+1} ... H_{ihi-1}. +// +// Each H_i has the form +// +// H_i = I - tau[i] * v * vᵀ +// +// where v is a real vector with v[0:i+1] = 0, v[i+1] = 1 and v[ihi+1:n] = 0. +// v[i+2:ihi+1] is stored on exit in A[i+2:ihi+1,i]. +// +// On entry, a contains the n×n general matrix to be reduced. On return, the +// upper triangle and the first subdiagonal of A will be overwritten with the +// upper Hessenberg matrix H, and the elements below the first subdiagonal, with +// the slice tau, represent the orthogonal matrix Q as a product of elementary +// reflectors. +// +// The contents of a are illustrated by the following example, with n = 7, ilo = +// 1 and ihi = 5. +// On entry, +// +// [ a a a a a a a ] +// [ a a a a a a ] +// [ a a a a a a ] +// [ a a a a a a ] +// [ a a a a a a ] +// [ a a a a a a ] +// [ a ] +// +// on return, +// +// [ a a h h h h a ] +// [ a h h h h a ] +// [ h h h h h h ] +// [ v1 h h h h h ] +// [ v1 v2 h h h h ] +// [ v1 v2 v3 h h h ] +// [ a ] +// +// where a denotes an element of the original matrix A, h denotes a +// modified element of the upper Hessenberg matrix H, and vi denotes an +// element of the vector defining H_i. +// +// ilo and ihi determine the block of A that will be reduced to upper Hessenberg +// form. It must hold that 0 <= ilo <= ihi < n if n > 0, and ilo == 0 and ihi == +// -1 if n == 0, otherwise Dgehrd will panic. +// +// On return, tau will contain the scalar factors of the elementary reflectors. +// Elements tau[:ilo] and tau[ihi:] will be set to zero. tau must have length +// equal to n-1 if n > 0, otherwise Dgehrd will panic. +// +// work must have length at least lwork and lwork must be at least max(1,n), +// otherwise Dgehrd will panic. On return, work[0] contains the optimal value of +// lwork. +// +// If lwork == -1, instead of performing Dgehrd, only the optimal value of lwork +// will be stored in work[0]. +// +// Dgehrd is an internal routine. It is exported for testing purposes. +func (impl Implementation) Dgehrd(n, ilo, ihi int, a []float64, lda int, tau, work []float64, lwork int) { + switch { + case n < 0: + panic(nLT0) + case ilo < 0 || max(0, n-1) < ilo: + panic(badIlo) + case ihi < min(ilo, n-1) || n <= ihi: + panic(badIhi) + case lda < max(1, n): + panic(badLdA) + case lwork < max(1, n) && lwork != -1: + panic(badLWork) + case len(work) < lwork: + panic(shortWork) + } + + // Quick return if possible. + if n == 0 { + work[0] = 1 + return + } + + const ( + nbmax = 64 + ldt = nbmax + 1 + tsize = ldt * nbmax + ) + // Compute the workspace requirements. + nb := min(nbmax, impl.Ilaenv(1, "DGEHRD", " ", n, ilo, ihi, -1)) + lwkopt := n*nb + tsize + if lwork == -1 { + work[0] = float64(lwkopt) + return + } + + if len(a) < (n-1)*lda+n { + panic(shortA) + } + if len(tau) != n-1 { + panic(badLenTau) + } + + // Set tau[:ilo] and tau[ihi:] to zero. + for i := 0; i < ilo; i++ { + tau[i] = 0 + } + for i := ihi; i < n-1; i++ { + tau[i] = 0 + } + + // Quick return if possible. + nh := ihi - ilo + 1 + if nh <= 1 { + work[0] = 1 + return + } + + // Determine the block size. + nbmin := 2 + var nx int + if 1 < nb && nb < nh { + // Determine when to cross over from blocked to unblocked code + // (last block is always handled by unblocked code). + nx = max(nb, impl.Ilaenv(3, "DGEHRD", " ", n, ilo, ihi, -1)) + if nx < nh { + // Determine if workspace is large enough for blocked code. + if lwork < n*nb+tsize { + // Not enough workspace to use optimal nb: + // determine the minimum value of nb, and reduce + // nb or force use of unblocked code. + nbmin = max(2, impl.Ilaenv(2, "DGEHRD", " ", n, ilo, ihi, -1)) + if lwork >= n*nbmin+tsize { + nb = (lwork - tsize) / n + } else { + nb = 1 + } + } + } + } + ldwork := nb // work is used as an n×nb matrix. + + var i int + if nb < nbmin || nh <= nb { + // Use unblocked code below. + i = ilo + } else { + // Use blocked code. + bi := blas64.Implementation() + iwt := n * nb // Size of the matrix Y and index where the matrix T starts in work. + for i = ilo; i < ihi-nx; i += nb { + ib := min(nb, ihi-i) + + // Reduce columns [i:i+ib] to Hessenberg form, returning the + // matrices V and T of the block reflector H = I - V*T*Vᵀ + // which performs the reduction, and also the matrix Y = A*V*T. + impl.Dlahr2(ihi+1, i+1, ib, a[i:], lda, tau[i:], work[iwt:], ldt, work, ldwork) + + // Apply the block reflector H to A[:ihi+1,i+ib:ihi+1] from the + // right, computing A := A - Y * Vᵀ. V[i+ib,i+ib-1] must be set + // to 1. + ei := a[(i+ib)*lda+i+ib-1] + a[(i+ib)*lda+i+ib-1] = 1 + bi.Dgemm(blas.NoTrans, blas.Trans, ihi+1, ihi-i-ib+1, ib, + -1, work, ldwork, + a[(i+ib)*lda+i:], lda, + 1, a[i+ib:], lda) + a[(i+ib)*lda+i+ib-1] = ei + + // Apply the block reflector H to A[0:i+1,i+1:i+ib-1] from the + // right. + bi.Dtrmm(blas.Right, blas.Lower, blas.Trans, blas.Unit, i+1, ib-1, + 1, a[(i+1)*lda+i:], lda, work, ldwork) + for j := 0; j <= ib-2; j++ { + bi.Daxpy(i+1, -1, work[j:], ldwork, a[i+j+1:], lda) + } + + // Apply the block reflector H to A[i+1:ihi+1,i+ib:n] from the + // left. + impl.Dlarfb(blas.Left, blas.Trans, lapack.Forward, lapack.ColumnWise, + ihi-i, n-i-ib, ib, + a[(i+1)*lda+i:], lda, work[iwt:], ldt, a[(i+1)*lda+i+ib:], lda, work, ldwork) + } + } + // Use unblocked code to reduce the rest of the matrix. + impl.Dgehd2(n, i, ihi, a, lda, tau, work) + work[0] = float64(lwkopt) +} diff --git a/vendor/gonum.org/v1/gonum/lapack/gonum/dgelq2.go b/vendor/gonum.org/v1/gonum/lapack/gonum/dgelq2.go new file mode 100644 index 0000000000..abc96f7d2a --- /dev/null +++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dgelq2.go @@ -0,0 +1,65 @@ +// Copyright ©2015 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gonum + +import "gonum.org/v1/gonum/blas" + +// Dgelq2 computes the LQ factorization of the m×n matrix A. +// +// In an LQ factorization, L is a lower triangular m×n matrix, and Q is an n×n +// orthonormal matrix. +// +// a is modified to contain the information to construct L and Q. +// The lower triangle of a contains the matrix L. The upper triangular elements +// (not including the diagonal) contain the elementary reflectors. tau is modified +// to contain the reflector scales. tau must have length of at least k = min(m,n) +// and this function will panic otherwise. +// +// See Dgeqr2 for a description of the elementary reflectors and orthonormal +// matrix Q. Q is constructed as a product of these elementary reflectors, +// Q = H_{k-1} * ... * H_1 * H_0. +// +// work is temporary storage of length at least m and this function will panic otherwise. +// +// Dgelq2 is an internal routine. It is exported for testing purposes. +func (impl Implementation) Dgelq2(m, n int, a []float64, lda int, tau, work []float64) { + switch { + case m < 0: + panic(mLT0) + case n < 0: + panic(nLT0) + case lda < max(1, n): + panic(badLdA) + } + + // Quick return if possible. + k := min(m, n) + if k == 0 { + return + } + + switch { + case len(a) < (m-1)*lda+n: + panic(shortA) + case len(tau) < k: + panic(shortTau) + case len(work) < m: + panic(shortWork) + } + + for i := 0; i < k; i++ { + a[i*lda+i], tau[i] = impl.Dlarfg(n-i, a[i*lda+i], a[i*lda+min(i+1, n-1):], 1) + if i < m-1 { + aii := a[i*lda+i] + a[i*lda+i] = 1 + impl.Dlarf(blas.Right, m-i-1, n-i, + a[i*lda+i:], 1, + tau[i], + a[(i+1)*lda+i:], lda, + work) + a[i*lda+i] = aii + } + } +} diff --git a/vendor/gonum.org/v1/gonum/lapack/gonum/dgelqf.go b/vendor/gonum.org/v1/gonum/lapack/gonum/dgelqf.go new file mode 100644 index 0000000000..f1fd13a019 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dgelqf.go @@ -0,0 +1,97 @@ +// Copyright ©2015 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gonum + +import ( + "gonum.org/v1/gonum/blas" + "gonum.org/v1/gonum/lapack" +) + +// Dgelqf computes the LQ factorization of the m×n matrix A using a blocked +// algorithm. See the documentation for Dgelq2 for a description of the +// parameters at entry and exit. +// +// work is temporary storage, and lwork specifies the usable memory length. +// At minimum, lwork >= m, and this function will panic otherwise. +// Dgelqf is a blocked LQ factorization, but the block size is limited +// by the temporary space available. If lwork == -1, instead of performing Dgelqf, +// the optimal work length will be stored into work[0]. +// +// tau must have length at least min(m,n), and this function will panic otherwise. +func (impl Implementation) Dgelqf(m, n int, a []float64, lda int, tau, work []float64, lwork int) { + switch { + case m < 0: + panic(mLT0) + case n < 0: + panic(nLT0) + case lda < max(1, n): + panic(badLdA) + case lwork < max(1, m) && lwork != -1: + panic(badLWork) + case len(work) < max(1, lwork): + panic(shortWork) + } + + k := min(m, n) + if k == 0 { + work[0] = 1 + return + } + + nb := impl.Ilaenv(1, "DGELQF", " ", m, n, -1, -1) + if lwork == -1 { + work[0] = float64(m * nb) + return + } + + if len(a) < (m-1)*lda+n { + panic(shortA) + } + if len(tau) < k { + panic(shortTau) + } + + // Find the optimal blocking size based on the size of available memory + // and optimal machine parameters. + nbmin := 2 + var nx int + iws := m + if 1 < nb && nb < k { + nx = max(0, impl.Ilaenv(3, "DGELQF", " ", m, n, -1, -1)) + if nx < k { + iws = m * nb + if lwork < iws { + nb = lwork / m + nbmin = max(2, impl.Ilaenv(2, "DGELQF", " ", m, n, -1, -1)) + } + } + } + ldwork := nb + // Computed blocked LQ factorization. + var i int + if nbmin <= nb && nb < k && nx < k { + for i = 0; i < k-nx; i += nb { + ib := min(k-i, nb) + impl.Dgelq2(ib, n-i, a[i*lda+i:], lda, tau[i:], work) + if i+ib < m { + impl.Dlarft(lapack.Forward, lapack.RowWise, n-i, ib, + a[i*lda+i:], lda, + tau[i:], + work, ldwork) + impl.Dlarfb(blas.Right, blas.NoTrans, lapack.Forward, lapack.RowWise, + m-i-ib, n-i, ib, + a[i*lda+i:], lda, + work, ldwork, + a[(i+ib)*lda+i:], lda, + work[ib*ldwork:], ldwork) + } + } + } + // Perform unblocked LQ factorization on the remainder. + if i < k { + impl.Dgelq2(m-i, n-i, a[i*lda+i:], lda, tau[i:], work) + } + work[0] = float64(iws) +} diff --git a/vendor/gonum.org/v1/gonum/lapack/gonum/dgels.go b/vendor/gonum.org/v1/gonum/lapack/gonum/dgels.go new file mode 100644 index 0000000000..3018973a9e --- /dev/null +++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dgels.go @@ -0,0 +1,220 @@ +// Copyright ©2015 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gonum + +import ( + "gonum.org/v1/gonum/blas" + "gonum.org/v1/gonum/lapack" +) + +// Dgels finds a minimum-norm solution based on the matrices A and B using the +// QR or LQ factorization. Dgels returns false if the matrix +// A is singular, and true if this solution was successfully found. +// +// The minimization problem solved depends on the input parameters. +// +// 1. If m >= n and trans == blas.NoTrans, Dgels finds X such that || A*X - B||_2 +// is minimized. +// 2. If m < n and trans == blas.NoTrans, Dgels finds the minimum norm solution of +// A * X = B. +// 3. If m >= n and trans == blas.Trans, Dgels finds the minimum norm solution of +// Aᵀ * X = B. +// 4. If m < n and trans == blas.Trans, Dgels finds X such that || A*X - B||_2 +// is minimized. +// +// Note that the least-squares solutions (cases 1 and 3) perform the minimization +// per column of B. This is not the same as finding the minimum-norm matrix. +// +// The matrix A is a general matrix of size m×n and is modified during this call. +// The input matrix B is of size max(m,n)×nrhs, and serves two purposes. On entry, +// the elements of b specify the input matrix B. B has size m×nrhs if +// trans == blas.NoTrans, and n×nrhs if trans == blas.Trans. On exit, the +// leading submatrix of b contains the solution vectors X. If trans == blas.NoTrans, +// this submatrix is of size n×nrhs, and of size m×nrhs otherwise. +// +// work is temporary storage, and lwork specifies the usable memory length. +// At minimum, lwork >= max(m,n) + max(m,n,nrhs), and this function will panic +// otherwise. A longer work will enable blocked algorithms to be called. +// In the special case that lwork == -1, work[0] will be set to the optimal working +// length. +func (impl Implementation) Dgels(trans blas.Transpose, m, n, nrhs int, a []float64, lda int, b []float64, ldb int, work []float64, lwork int) bool { + mn := min(m, n) + minwrk := mn + max(mn, nrhs) + switch { + case trans != blas.NoTrans && trans != blas.Trans && trans != blas.ConjTrans: + panic(badTrans) + case m < 0: + panic(mLT0) + case n < 0: + panic(nLT0) + case nrhs < 0: + panic(nrhsLT0) + case lda < max(1, n): + panic(badLdA) + case ldb < max(1, nrhs): + panic(badLdB) + case lwork < max(1, minwrk) && lwork != -1: + panic(badLWork) + case len(work) < max(1, lwork): + panic(shortWork) + } + + // Quick return if possible. + if mn == 0 || nrhs == 0 { + impl.Dlaset(blas.All, max(m, n), nrhs, 0, 0, b, ldb) + work[0] = 1 + return true + } + + // Find optimal block size. + var nb int + if m >= n { + nb = impl.Ilaenv(1, "DGEQRF", " ", m, n, -1, -1) + if trans != blas.NoTrans { + nb = max(nb, impl.Ilaenv(1, "DORMQR", "LN", m, nrhs, n, -1)) + } else { + nb = max(nb, impl.Ilaenv(1, "DORMQR", "LT", m, nrhs, n, -1)) + } + } else { + nb = impl.Ilaenv(1, "DGELQF", " ", m, n, -1, -1) + if trans != blas.NoTrans { + nb = max(nb, impl.Ilaenv(1, "DORMLQ", "LT", n, nrhs, m, -1)) + } else { + nb = max(nb, impl.Ilaenv(1, "DORMLQ", "LN", n, nrhs, m, -1)) + } + } + wsize := max(1, mn+max(mn, nrhs)*nb) + work[0] = float64(wsize) + + if lwork == -1 { + return true + } + + switch { + case len(a) < (m-1)*lda+n: + panic(shortA) + case len(b) < (max(m, n)-1)*ldb+nrhs: + panic(shortB) + } + + // Scale the input matrices if they contain extreme values. + smlnum := dlamchS / dlamchP + bignum := 1 / smlnum + anrm := impl.Dlange(lapack.MaxAbs, m, n, a, lda, nil) + var iascl int + if anrm > 0 && anrm < smlnum { + impl.Dlascl(lapack.General, 0, 0, anrm, smlnum, m, n, a, lda) + iascl = 1 + } else if anrm > bignum { + impl.Dlascl(lapack.General, 0, 0, anrm, bignum, m, n, a, lda) + } else if anrm == 0 { + // Matrix is all zeros. + impl.Dlaset(blas.All, max(m, n), nrhs, 0, 0, b, ldb) + return true + } + brow := m + if trans != blas.NoTrans { + brow = n + } + bnrm := impl.Dlange(lapack.MaxAbs, brow, nrhs, b, ldb, nil) + ibscl := 0 + if bnrm > 0 && bnrm < smlnum { + impl.Dlascl(lapack.General, 0, 0, bnrm, smlnum, brow, nrhs, b, ldb) + ibscl = 1 + } else if bnrm > bignum { + impl.Dlascl(lapack.General, 0, 0, bnrm, bignum, brow, nrhs, b, ldb) + ibscl = 2 + } + + // Solve the minimization problem using a QR or an LQ decomposition. + var scllen int + if m >= n { + impl.Dgeqrf(m, n, a, lda, work[:n], work[mn:], lwork-mn) + if trans == blas.NoTrans { + impl.Dormqr(blas.Left, blas.Trans, m, nrhs, n, + a, lda, + work[:n], + b, ldb, + work[mn:], lwork-mn) + ok := impl.Dtrtrs(blas.Upper, blas.NoTrans, blas.NonUnit, n, nrhs, + a, lda, + b, ldb) + if !ok { + return false + } + scllen = n + } else { + ok := impl.Dtrtrs(blas.Upper, blas.Trans, blas.NonUnit, n, nrhs, + a, lda, + b, ldb) + if !ok { + return false + } + for i := n; i < m; i++ { + for j := 0; j < nrhs; j++ { + b[i*ldb+j] = 0 + } + } + impl.Dormqr(blas.Left, blas.NoTrans, m, nrhs, n, + a, lda, + work[:n], + b, ldb, + work[mn:], lwork-mn) + scllen = m + } + } else { + impl.Dgelqf(m, n, a, lda, work, work[mn:], lwork-mn) + if trans == blas.NoTrans { + ok := impl.Dtrtrs(blas.Lower, blas.NoTrans, blas.NonUnit, + m, nrhs, + a, lda, + b, ldb) + if !ok { + return false + } + for i := m; i < n; i++ { + for j := 0; j < nrhs; j++ { + b[i*ldb+j] = 0 + } + } + impl.Dormlq(blas.Left, blas.Trans, n, nrhs, m, + a, lda, + work, + b, ldb, + work[mn:], lwork-mn) + scllen = n + } else { + impl.Dormlq(blas.Left, blas.NoTrans, n, nrhs, m, + a, lda, + work, + b, ldb, + work[mn:], lwork-mn) + ok := impl.Dtrtrs(blas.Lower, blas.Trans, blas.NonUnit, + m, nrhs, + a, lda, + b, ldb) + if !ok { + return false + } + } + } + + // Adjust answer vector based on scaling. + if iascl == 1 { + impl.Dlascl(lapack.General, 0, 0, anrm, smlnum, scllen, nrhs, b, ldb) + } + if iascl == 2 { + impl.Dlascl(lapack.General, 0, 0, anrm, bignum, scllen, nrhs, b, ldb) + } + if ibscl == 1 { + impl.Dlascl(lapack.General, 0, 0, smlnum, bnrm, scllen, nrhs, b, ldb) + } + if ibscl == 2 { + impl.Dlascl(lapack.General, 0, 0, bignum, bnrm, scllen, nrhs, b, ldb) + } + + work[0] = float64(wsize) + return true +} diff --git a/vendor/gonum.org/v1/gonum/lapack/gonum/dgeql2.go b/vendor/gonum.org/v1/gonum/lapack/gonum/dgeql2.go new file mode 100644 index 0000000000..d18989d274 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dgeql2.go @@ -0,0 +1,67 @@ +// Copyright ©2016 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gonum + +import "gonum.org/v1/gonum/blas" + +// Dgeql2 computes the QL factorization of the m×n matrix A. That is, Dgeql2 +// computes Q and L such that +// +// A = Q * L +// +// where Q is an m×m orthonormal matrix and L is a lower trapezoidal matrix. +// +// Q is represented as a product of elementary reflectors, +// +// Q = H_{k-1} * ... * H_1 * H_0 +// +// where k = min(m,n) and each H_i has the form +// +// H_i = I - tau[i] * v_i * v_iᵀ +// +// Vector v_i has v[m-k+i+1:m] = 0, v[m-k+i] = 1, and v[:m-k+i+1] is stored on +// exit in A[0:m-k+i-1, n-k+i]. +// +// tau must have length at least min(m,n), and Dgeql2 will panic otherwise. +// +// work is temporary memory storage and must have length at least n. +// +// Dgeql2 is an internal routine. It is exported for testing purposes. +func (impl Implementation) Dgeql2(m, n int, a []float64, lda int, tau, work []float64) { + switch { + case m < 0: + panic(mLT0) + case n < 0: + panic(nLT0) + case lda < max(1, n): + panic(badLdA) + } + + // Quick return if possible. + k := min(m, n) + if k == 0 { + return + } + + switch { + case len(a) < (m-1)*lda+n: + panic(shortA) + case len(tau) < k: + panic(shortTau) + case len(work) < n: + panic(shortWork) + } + + var aii float64 + for i := k - 1; i >= 0; i-- { + // Generate elementary reflector H_i to annihilate A[0:m-k+i-1, n-k+i]. + aii, tau[i] = impl.Dlarfg(m-k+i+1, a[(m-k+i)*lda+n-k+i], a[n-k+i:], lda) + + // Apply H_i to A[0:m-k+i, 0:n-k+i-1] from the left. + a[(m-k+i)*lda+n-k+i] = 1 + impl.Dlarf(blas.Left, m-k+i+1, n-k+i, a[n-k+i:], lda, tau[i], a, lda, work) + a[(m-k+i)*lda+n-k+i] = aii + } +} diff --git a/vendor/gonum.org/v1/gonum/lapack/gonum/dgeqp3.go b/vendor/gonum.org/v1/gonum/lapack/gonum/dgeqp3.go new file mode 100644 index 0000000000..da8cd4fa76 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dgeqp3.go @@ -0,0 +1,195 @@ +// Copyright ©2017 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gonum + +import ( + "gonum.org/v1/gonum/blas" + "gonum.org/v1/gonum/blas/blas64" +) + +// Dgeqp3 computes a QR factorization with column pivoting of the m×n matrix A: +// +// A*P = Q*R +// +// where P is a permutation matrix, Q is an orthogonal matrix and R is a +// min(m,n)×n upper trapezoidal matrix. +// +// On return, the upper triangle of A contains the matrix R. The elements below +// the diagonal together with tau represent the matrix Q as a product of +// elementary reflectors +// +// Q = H_0 * H_1 * ... * H_{k-1}, where k = min(m,n). +// +// Each H_i has the form +// +// H_i = I - tau * v * vᵀ +// +// where tau is a scalar and v is a vector with v[0:i] = 0 and v[i] = 1; +// v[i+1:m] is stored on exit in A[i+1:m,i], and tau in tau[i]. +// +// jpvt specifies a column pivot to be applied to A. On entry, if jpvt[j] is at +// least zero, the jth column of A is permuted to the front of A*P (a leading +// column), if jpvt[j] is -1 the jth column of A is a free column. If jpvt[j] < +// -1, Dgeqp3 will panic. On return, jpvt holds the permutation that was +// applied; the jth column of A*P was the jpvt[j] column of A. jpvt must have +// length n or Dgeqp3 will panic. +// +// tau holds the scalar factors of the elementary reflectors. It must have +// length min(m,n), otherwise Dgeqp3 will panic. +// +// work must have length at least max(1,lwork), and lwork must be at least +// 3*n+1, otherwise Dgeqp3 will panic. For optimal performance lwork must be at +// least 2*n+(n+1)*nb, where nb is the optimal blocksize. On return, work[0] +// will contain the optimal value of lwork. +// +// If lwork == -1, instead of performing Dgeqp3, only the optimal value of lwork +// will be stored in work[0]. +// +// Dgeqp3 is an internal routine. It is exported for testing purposes. +func (impl Implementation) Dgeqp3(m, n int, a []float64, lda int, jpvt []int, tau, work []float64, lwork int) { + const ( + inb = 1 + inbmin = 2 + ixover = 3 + ) + + minmn := min(m, n) + iws := 3*n + 1 + if minmn == 0 { + iws = 1 + } + switch { + case m < 0: + panic(mLT0) + case n < 0: + panic(nLT0) + case lda < max(1, n): + panic(badLdA) + case lwork < iws && lwork != -1: + panic(badLWork) + case len(work) < max(1, lwork): + panic(shortWork) + } + + // Quick return if possible. + if minmn == 0 { + work[0] = 1 + return + } + + nb := impl.Ilaenv(inb, "DGEQRF", " ", m, n, -1, -1) + if lwork == -1 { + work[0] = float64(2*n + (n+1)*nb) + return + } + + switch { + case len(a) < (m-1)*lda+n: + panic(shortA) + case len(jpvt) != n: + panic(badLenJpvt) + case len(tau) < minmn: + panic(shortTau) + } + + for _, v := range jpvt { + if v < -1 || n <= v { + panic(badJpvt) + } + } + + bi := blas64.Implementation() + + // Move initial columns up front. + var nfxd int + for j := 0; j < n; j++ { + if jpvt[j] == -1 { + jpvt[j] = j + continue + } + if j != nfxd { + bi.Dswap(m, a[j:], lda, a[nfxd:], lda) + jpvt[j], jpvt[nfxd] = jpvt[nfxd], j + } else { + jpvt[j] = j + } + nfxd++ + } + + // Factorize nfxd columns. + // + // Compute the QR factorization of nfxd columns and update remaining columns. + if nfxd > 0 { + na := min(m, nfxd) + impl.Dgeqrf(m, na, a, lda, tau[:na], work, lwork) + iws = max(iws, int(work[0])) + if na < n { + impl.Dormqr(blas.Left, blas.Trans, m, n-na, na, a, lda, tau[:na], a[na:], lda, + work, lwork) + iws = max(iws, int(work[0])) + } + } + + if nfxd >= minmn { + work[0] = float64(iws) + return + } + + // Factorize free columns. + sm := m - nfxd + sn := n - nfxd + sminmn := minmn - nfxd + + // Determine the block size. + nb = impl.Ilaenv(inb, "DGEQRF", " ", sm, sn, -1, -1) + nbmin := 2 + nx := 0 + + if 1 < nb && nb < sminmn { + // Determine when to cross over from blocked to unblocked code. + nx = max(0, impl.Ilaenv(ixover, "DGEQRF", " ", sm, sn, -1, -1)) + + if nx < sminmn { + // Determine if workspace is large enough for blocked code. + minws := 2*sn + (sn+1)*nb + iws = max(iws, minws) + if lwork < minws { + // Not enough workspace to use optimal nb. Reduce + // nb and determine the minimum value of nb. + nb = (lwork - 2*sn) / (sn + 1) + nbmin = max(2, impl.Ilaenv(inbmin, "DGEQRF", " ", sm, sn, -1, -1)) + } + } + } + + // Initialize partial column norms. + // The first n elements of work store the exact column norms. + for j := nfxd; j < n; j++ { + work[j] = bi.Dnrm2(sm, a[nfxd*lda+j:], lda) + work[n+j] = work[j] + } + j := nfxd + if nbmin <= nb && nb < sminmn && nx < sminmn { + // Use blocked code initially. + + // Compute factorization. + var fjb int + for topbmn := minmn - nx; j < topbmn; j += fjb { + jb := min(nb, topbmn-j) + + // Factorize jb columns among columns j:n. + fjb = impl.Dlaqps(m, n-j, j, jb, a[j:], lda, jpvt[j:], tau[j:], + work[j:n], work[j+n:2*n], work[2*n:2*n+jb], work[2*n+jb:], jb) + } + } + + // Use unblocked code to factor the last or only block. + if j < minmn { + impl.Dlaqp2(m, n-j, j, a[j:], lda, jpvt[j:], tau[j:], + work[j:n], work[j+n:2*n], work[2*n:]) + } + + work[0] = float64(iws) +} diff --git a/vendor/gonum.org/v1/gonum/lapack/gonum/dgeqr2.go b/vendor/gonum.org/v1/gonum/lapack/gonum/dgeqr2.go new file mode 100644 index 0000000000..4d1a4b3b0c --- /dev/null +++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dgeqr2.go @@ -0,0 +1,78 @@ +// Copyright ©2015 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gonum + +import "gonum.org/v1/gonum/blas" + +// Dgeqr2 computes a QR factorization of the m×n matrix A. +// +// In a QR factorization, Q is an m×m orthonormal matrix, and R is an +// upper triangular m×n matrix. +// +// A is modified to contain the information to construct Q and R. +// The upper triangle of a contains the matrix R. The lower triangular elements +// (not including the diagonal) contain the elementary reflectors. tau is modified +// to contain the reflector scales. tau must have length min(m,n), and +// this function will panic otherwise. +// +// The ith elementary reflector can be explicitly constructed by first extracting +// the +// +// v[j] = 0 j < i +// v[j] = 1 j == i +// v[j] = a[j*lda+i] j > i +// +// and computing H_i = I - tau[i] * v * vᵀ. +// +// The orthonormal matrix Q can be constructed from a product of these elementary +// reflectors, Q = H_0 * H_1 * ... * H_{k-1}, where k = min(m,n). +// +// work is temporary storage of length at least n and this function will panic otherwise. +// +// Dgeqr2 is an internal routine. It is exported for testing purposes. +func (impl Implementation) Dgeqr2(m, n int, a []float64, lda int, tau, work []float64) { + // TODO(btracey): This is oriented such that columns of a are eliminated. + // This likely could be re-arranged to take better advantage of row-major + // storage. + + switch { + case m < 0: + panic(mLT0) + case n < 0: + panic(nLT0) + case lda < max(1, n): + panic(badLdA) + case len(work) < n: + panic(shortWork) + } + + // Quick return if possible. + k := min(m, n) + if k == 0 { + return + } + + switch { + case len(a) < (m-1)*lda+n: + panic(shortA) + case len(tau) != k: + panic(badLenTau) + } + + for i := 0; i < k; i++ { + // Generate elementary reflector H_i. + a[i*lda+i], tau[i] = impl.Dlarfg(m-i, a[i*lda+i], a[min((i+1), m-1)*lda+i:], lda) + if i < n-1 { + aii := a[i*lda+i] + a[i*lda+i] = 1 + impl.Dlarf(blas.Left, m-i, n-i-1, + a[i*lda+i:], lda, + tau[i], + a[i*lda+i+1:], lda, + work) + a[i*lda+i] = aii + } + } +} diff --git a/vendor/gonum.org/v1/gonum/lapack/gonum/dgeqrf.go b/vendor/gonum.org/v1/gonum/lapack/gonum/dgeqrf.go new file mode 100644 index 0000000000..2bcbde586c --- /dev/null +++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dgeqrf.go @@ -0,0 +1,108 @@ +// Copyright ©2015 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gonum + +import ( + "gonum.org/v1/gonum/blas" + "gonum.org/v1/gonum/lapack" +) + +// Dgeqrf computes the QR factorization of the m×n matrix A using a blocked +// algorithm. See the documentation for Dgeqr2 for a description of the +// parameters at entry and exit. +// +// work is temporary storage, and lwork specifies the usable memory length. +// The length of work must be at least max(1, lwork) and lwork must be -1 +// or at least n, otherwise this function will panic. +// Dgeqrf is a blocked QR factorization, but the block size is limited +// by the temporary space available. If lwork == -1, instead of performing Dgeqrf, +// the optimal work length will be stored into work[0]. +// +// tau must have length min(m,n), and this function will panic otherwise. +func (impl Implementation) Dgeqrf(m, n int, a []float64, lda int, tau, work []float64, lwork int) { + switch { + case m < 0: + panic(mLT0) + case n < 0: + panic(nLT0) + case lda < max(1, n): + panic(badLdA) + case lwork < max(1, n) && lwork != -1: + panic(badLWork) + case len(work) < max(1, lwork): + panic(shortWork) + } + + // Quick return if possible. + k := min(m, n) + if k == 0 { + work[0] = 1 + return + } + + // nb is the optimal blocksize, i.e. the number of columns transformed at a time. + nb := impl.Ilaenv(1, "DGEQRF", " ", m, n, -1, -1) + if lwork == -1 { + work[0] = float64(n * nb) + return + } + + if len(a) < (m-1)*lda+n { + panic(shortA) + } + if len(tau) != k { + panic(badLenTau) + } + + nbmin := 2 // Minimal block size. + var nx int // Use unblocked (unless changed in the next for loop) + iws := n + // Only consider blocked if the suggested block size is > 1 and the + // number of rows or columns is sufficiently large. + if 1 < nb && nb < k { + // nx is the block size at which the code switches from blocked + // to unblocked. + nx = max(0, impl.Ilaenv(3, "DGEQRF", " ", m, n, -1, -1)) + if k > nx { + iws = n * nb + if lwork < iws { + // Not enough workspace to use the optimal block + // size. Get the minimum block size instead. + nb = lwork / n + nbmin = max(2, impl.Ilaenv(2, "DGEQRF", " ", m, n, -1, -1)) + } + } + } + + // Compute QR using a blocked algorithm. + var i int + if nbmin <= nb && nb < k && nx < k { + ldwork := nb + for i = 0; i < k-nx; i += nb { + ib := min(k-i, nb) + // Compute the QR factorization of the current block. + impl.Dgeqr2(m-i, ib, a[i*lda+i:], lda, tau[i:i+ib], work) + if i+ib < n { + // Form the triangular factor of the block reflector and apply Hᵀ + // In Dlarft, work becomes the T matrix. + impl.Dlarft(lapack.Forward, lapack.ColumnWise, m-i, ib, + a[i*lda+i:], lda, + tau[i:], + work, ldwork) + impl.Dlarfb(blas.Left, blas.Trans, lapack.Forward, lapack.ColumnWise, + m-i, n-i-ib, ib, + a[i*lda+i:], lda, + work, ldwork, + a[i*lda+i+ib:], lda, + work[ib*ldwork:], ldwork) + } + } + } + // Call unblocked code on the remaining columns. + if i < k { + impl.Dgeqr2(m-i, n-i, a[i*lda+i:], lda, tau[i:], work) + } + work[0] = float64(iws) +} diff --git a/vendor/gonum.org/v1/gonum/lapack/gonum/dgerq2.go b/vendor/gonum.org/v1/gonum/lapack/gonum/dgerq2.go new file mode 100644 index 0000000000..44ca1bc1a0 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dgerq2.go @@ -0,0 +1,74 @@ +// Copyright ©2017 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gonum + +import "gonum.org/v1/gonum/blas" + +// Dgerq2 computes an RQ factorization of the m×n matrix A, +// +// A = R * Q. +// +// On exit, if m <= n, the upper triangle of the subarray +// A[0:m, n-m:n] contains the m×m upper triangular matrix R. +// If m >= n, the elements on and above the (m-n)-th subdiagonal +// contain the m×n upper trapezoidal matrix R. +// The remaining elements, with tau, represent the +// orthogonal matrix Q as a product of min(m,n) elementary +// reflectors. +// +// The matrix Q is represented as a product of elementary reflectors +// +// Q = H_0 H_1 . . . H_{min(m,n)-1}. +// +// Each H(i) has the form +// +// H_i = I - tau_i * v * vᵀ +// +// where v is a vector with v[0:n-k+i-1] stored in A[m-k+i, 0:n-k+i-1], +// v[n-k+i:n] = 0 and v[n-k+i] = 1. +// +// tau must have length min(m,n) and work must have length m, otherwise +// Dgerq2 will panic. +// +// Dgerq2 is an internal routine. It is exported for testing purposes. +func (impl Implementation) Dgerq2(m, n int, a []float64, lda int, tau, work []float64) { + switch { + case m < 0: + panic(mLT0) + case n < 0: + panic(nLT0) + case lda < max(1, n): + panic(badLdA) + case len(work) < m: + panic(shortWork) + } + + // Quick return if possible. + k := min(m, n) + if k == 0 { + return + } + + switch { + case len(a) < (m-1)*lda+n: + panic(shortA) + case len(tau) < k: + panic(shortTau) + } + + for i := k - 1; i >= 0; i-- { + // Generate elementary reflector H[i] to annihilate + // A[m-k+i, 0:n-k+i-1]. + mki := m - k + i + nki := n - k + i + var aii float64 + aii, tau[i] = impl.Dlarfg(nki+1, a[mki*lda+nki], a[mki*lda:], 1) + + // Apply H[i] to A[0:m-k+i-1, 0:n-k+i] from the right. + a[mki*lda+nki] = 1 + impl.Dlarf(blas.Right, mki, nki+1, a[mki*lda:], 1, tau[i], a, lda, work) + a[mki*lda+nki] = aii + } +} diff --git a/vendor/gonum.org/v1/gonum/lapack/gonum/dgerqf.go b/vendor/gonum.org/v1/gonum/lapack/gonum/dgerqf.go new file mode 100644 index 0000000000..fe010b4792 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dgerqf.go @@ -0,0 +1,135 @@ +// Copyright ©2017 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gonum + +import ( + "gonum.org/v1/gonum/blas" + "gonum.org/v1/gonum/lapack" +) + +// Dgerqf computes an RQ factorization of the m×n matrix A, +// +// A = R * Q. +// +// On exit, if m <= n, the upper triangle of the subarray +// A[0:m, n-m:n] contains the m×m upper triangular matrix R. +// If m >= n, the elements on and above the (m-n)-th subdiagonal +// contain the m×n upper trapezoidal matrix R. +// The remaining elements, with tau, represent the +// orthogonal matrix Q as a product of min(m,n) elementary +// reflectors. +// +// The matrix Q is represented as a product of elementary reflectors +// +// Q = H_0 H_1 . . . H_{min(m,n)-1}. +// +// Each H(i) has the form +// +// H_i = I - tau_i * v * vᵀ +// +// where v is a vector with v[0:n-k+i-1] stored in A[m-k+i, 0:n-k+i-1], +// v[n-k+i:n] = 0 and v[n-k+i] = 1. +// +// tau must have length min(m,n), work must have length max(1, lwork), +// and lwork must be -1 or at least max(1, m), otherwise Dgerqf will panic. +// On exit, work[0] will contain the optimal length for work. +// +// Dgerqf is an internal routine. It is exported for testing purposes. +func (impl Implementation) Dgerqf(m, n int, a []float64, lda int, tau, work []float64, lwork int) { + switch { + case m < 0: + panic(mLT0) + case n < 0: + panic(nLT0) + case lda < max(1, n): + panic(badLdA) + case lwork < max(1, m) && lwork != -1: + panic(badLWork) + case len(work) < max(1, lwork): + panic(shortWork) + } + + // Quick return if possible. + k := min(m, n) + if k == 0 { + work[0] = 1 + return + } + + nb := impl.Ilaenv(1, "DGERQF", " ", m, n, -1, -1) + if lwork == -1 { + work[0] = float64(m * nb) + return + } + + if len(a) < (m-1)*lda+n { + panic(shortA) + } + if len(tau) != k { + panic(badLenTau) + } + + nbmin := 2 + nx := 1 + iws := m + var ldwork int + if 1 < nb && nb < k { + // Determine when to cross over from blocked to unblocked code. + nx = max(0, impl.Ilaenv(3, "DGERQF", " ", m, n, -1, -1)) + if nx < k { + // Determine whether workspace is large enough for blocked code. + iws = m * nb + if lwork < iws { + // Not enough workspace to use optimal nb. Reduce + // nb and determine the minimum value of nb. + nb = lwork / m + nbmin = max(2, impl.Ilaenv(2, "DGERQF", " ", m, n, -1, -1)) + } + ldwork = nb + } + } + + var mu, nu int + if nbmin <= nb && nb < k && nx < k { + // Use blocked code initially. + // The last kk rows are handled by the block method. + ki := ((k - nx - 1) / nb) * nb + kk := min(k, ki+nb) + + var i int + for i = k - kk + ki; i >= k-kk; i -= nb { + ib := min(k-i, nb) + + // Compute the RQ factorization of the current block + // A[m-k+i:m-k+i+ib-1, 0:n-k+i+ib-1]. + impl.Dgerq2(ib, n-k+i+ib, a[(m-k+i)*lda:], lda, tau[i:], work) + if m-k+i > 0 { + // Form the triangular factor of the block reflector + // H = H_{i+ib-1} . . . H_{i+1} H_i. + impl.Dlarft(lapack.Backward, lapack.RowWise, + n-k+i+ib, ib, a[(m-k+i)*lda:], lda, tau[i:], + work, ldwork) + + // Apply H to A[0:m-k+i-1, 0:n-k+i+ib-1] from the right. + impl.Dlarfb(blas.Right, blas.NoTrans, lapack.Backward, lapack.RowWise, + m-k+i, n-k+i+ib, ib, a[(m-k+i)*lda:], lda, + work, ldwork, + a, lda, + work[ib*ldwork:], ldwork) + } + } + mu = m - k + i + nb + nu = n - k + i + nb + } else { + mu = m + nu = n + } + + // Use unblocked code to factor the last or only block. + if mu > 0 && nu > 0 { + impl.Dgerq2(mu, nu, a, lda, tau, work) + } + work[0] = float64(iws) +} diff --git a/vendor/gonum.org/v1/gonum/lapack/gonum/dgesc2.go b/vendor/gonum.org/v1/gonum/lapack/gonum/dgesc2.go new file mode 100644 index 0000000000..b2201085c5 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dgesc2.go @@ -0,0 +1,93 @@ +// Copyright ©2021 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gonum + +import ( + "math" + + "gonum.org/v1/gonum/blas/blas64" +) + +// Dgesc2 solves a system of linear equations +// +// A * x = scale * b +// +// with a general n×n matrix A represented by the LU factorization with complete +// pivoting +// +// A = P * L * U * Q +// +// as computed by Dgetc2. +// +// On entry, rhs contains the right hand side vector b. On return, it is +// overwritten with the solution vector x. +// +// Dgesc2 returns a scale factor +// +// 0 <= scale <= 1 +// +// chosen to prevent overflow in the solution. +// +// Dgesc2 is an internal routine. It is exported for testing purposes. +func (impl Implementation) Dgesc2(n int, a []float64, lda int, rhs []float64, ipiv, jpiv []int) (scale float64) { + switch { + case n < 0: + panic(nLT0) + case lda < max(1, n): + panic(badLdA) + } + + // Quick return if possible. + if n == 0 { + return 0 + } + + switch { + case len(a) < (n-1)*lda+n: + panic(shortA) + case len(rhs) < n: + panic(shortRHS) + case len(ipiv) != n: + panic(badLenIpiv) + case len(jpiv) != n: + panic(badLenJpiv) + } + + const smlnum = dlamchS / dlamchP + + // Apply permutations ipiv to rhs. + impl.Dlaswp(1, rhs, 1, 0, n-1, ipiv[:n], 1) + + // Solve for L part. + for i := 0; i < n-1; i++ { + for j := i + 1; j < n; j++ { + rhs[j] -= float64(a[j*lda+i] * rhs[i]) + } + } + + // Check for scaling. + scale = 1.0 + bi := blas64.Implementation() + i := bi.Idamax(n, rhs, 1) + if 2*smlnum*math.Abs(rhs[i]) > math.Abs(a[(n-1)*lda+(n-1)]) { + temp := 0.5 / math.Abs(rhs[i]) + bi.Dscal(n, temp, rhs, 1) + scale *= temp + } + + // Solve for U part. + for i := n - 1; i >= 0; i-- { + temp := 1.0 / a[i*lda+i] + rhs[i] *= temp + for j := i + 1; j < n; j++ { + rhs[i] -= float64(rhs[j] * (a[i*lda+j] * temp)) + } + } + + // Apply permutations jpiv to the solution (rhs). + impl.Dlaswp(1, rhs, 1, 0, n-1, jpiv[:n], -1) + + return scale +} diff --git a/vendor/gonum.org/v1/gonum/lapack/gonum/dgesv.go b/vendor/gonum.org/v1/gonum/lapack/gonum/dgesv.go new file mode 100644 index 0000000000..0be4414ca1 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dgesv.go @@ -0,0 +1,60 @@ +// Copyright ©2021 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gonum + +import "gonum.org/v1/gonum/blas" + +// Dgesv computes the solution to a real system of linear equations +// +// A * X = B +// +// where A is an n×n matrix and X and B are n×nrhs matrices. +// +// The LU decomposition with partial pivoting and row interchanges is used to +// factor A as +// +// A = P * L * U +// +// where P is a permutation matrix, L is unit lower triangular, and U is upper +// triangular. On return, the factors L and U are stored in a; the unit diagonal +// elements of L are not stored. The row pivot indices that define the +// permutation matrix P are stored in ipiv. +// +// The factored form of A is then used to solve the system of equations A * X = +// B. On entry, b contains the right hand side matrix B. On return, if ok is +// true, b contains the solution matrix X. +func (impl Implementation) Dgesv(n, nrhs int, a []float64, lda int, ipiv []int, b []float64, ldb int) (ok bool) { + switch { + case n < 0: + panic(nLT0) + case nrhs < 0: + panic(nrhsLT0) + case lda < max(1, n): + panic(badLdA) + case ldb < max(1, nrhs): + panic(badLdB) + } + + // Quick return if possible. + if n == 0 || nrhs == 0 { + return true + } + + switch { + case len(a) < (n-1)*lda+n: + panic(shortAB) + case len(ipiv) != n: + panic(badLenIpiv) + case len(b) < (n-1)*ldb+nrhs: + panic(shortB) + } + + ok = impl.Dgetrf(n, n, a, lda, ipiv) + if ok { + impl.Dgetrs(blas.NoTrans, n, nrhs, a, lda, ipiv, b, ldb) + } + + return ok +} diff --git a/vendor/gonum.org/v1/gonum/lapack/gonum/dgesvd.go b/vendor/gonum.org/v1/gonum/lapack/gonum/dgesvd.go new file mode 100644 index 0000000000..97da749bfb --- /dev/null +++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dgesvd.go @@ -0,0 +1,1378 @@ +// Copyright ©2015 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gonum + +import ( + "math" + + "gonum.org/v1/gonum/blas" + "gonum.org/v1/gonum/blas/blas64" + "gonum.org/v1/gonum/lapack" +) + +const noSVDO = "dgesvd: not coded for overwrite" + +// Dgesvd computes the singular value decomposition of the input matrix A. +// +// The singular value decomposition is +// +// A = U * Sigma * Vᵀ +// +// where Sigma is an m×n diagonal matrix containing the singular values of A, +// U is an m×m orthogonal matrix and V is an n×n orthogonal matrix. The first +// min(m,n) columns of U and V are the left and right singular vectors of A +// respectively. +// +// jobU and jobVT are options for computing the singular vectors. The behavior +// is as follows +// +// jobU == lapack.SVDAll All m columns of U are returned in u +// jobU == lapack.SVDStore The first min(m,n) columns are returned in u +// jobU == lapack.SVDOverwrite The first min(m,n) columns of U are written into a +// jobU == lapack.SVDNone The columns of U are not computed. +// +// The behavior is the same for jobVT and the rows of Vᵀ. At most one of jobU +// and jobVT can equal lapack.SVDOverwrite, and Dgesvd will panic otherwise. +// +// On entry, a contains the data for the m×n matrix A. During the call to Dgesvd +// the data is overwritten. On exit, A contains the appropriate singular vectors +// if either job is lapack.SVDOverwrite. +// +// s is a slice of length at least min(m,n) and on exit contains the singular +// values in decreasing order. +// +// u contains the left singular vectors on exit, stored column-wise. If +// jobU == lapack.SVDAll, u is of size m×m. If jobU == lapack.SVDStore u is +// of size m×min(m,n). If jobU == lapack.SVDOverwrite or lapack.SVDNone, u is +// not used. +// +// vt contains the left singular vectors on exit, stored row-wise. If +// jobV == lapack.SVDAll, vt is of size n×n. If jobVT == lapack.SVDStore vt is +// of size min(m,n)×n. If jobVT == lapack.SVDOverwrite or lapack.SVDNone, vt is +// not used. +// +// work is a slice for storing temporary memory, and lwork is the usable size of +// the slice. lwork must be at least max(5*min(m,n), 3*min(m,n)+max(m,n)). +// If lwork == -1, instead of performing Dgesvd, the optimal work length will be +// stored into work[0]. Dgesvd will panic if the working memory has insufficient +// storage. +// +// Dgesvd returns whether the decomposition successfully completed. +func (impl Implementation) Dgesvd(jobU, jobVT lapack.SVDJob, m, n int, a []float64, lda int, s, u []float64, ldu int, vt []float64, ldvt int, work []float64, lwork int) (ok bool) { + if jobU == lapack.SVDOverwrite || jobVT == lapack.SVDOverwrite { + panic(noSVDO) + } + + wantua := jobU == lapack.SVDAll + wantus := jobU == lapack.SVDStore + wantuas := wantua || wantus + wantuo := jobU == lapack.SVDOverwrite + wantun := jobU == lapack.SVDNone + if !(wantua || wantus || wantuo || wantun) { + panic(badSVDJob) + } + + wantva := jobVT == lapack.SVDAll + wantvs := jobVT == lapack.SVDStore + wantvas := wantva || wantvs + wantvo := jobVT == lapack.SVDOverwrite + wantvn := jobVT == lapack.SVDNone + if !(wantva || wantvs || wantvo || wantvn) { + panic(badSVDJob) + } + + if wantuo && wantvo { + panic(bothSVDOver) + } + + minmn := min(m, n) + minwork := 1 + if minmn > 0 { + minwork = max(3*minmn+max(m, n), 5*minmn) + } + switch { + case m < 0: + panic(mLT0) + case n < 0: + panic(nLT0) + case lda < max(1, n): + panic(badLdA) + case ldu < 1, wantua && ldu < m, wantus && ldu < minmn: + panic(badLdU) + case ldvt < 1 || (wantvas && ldvt < n): + panic(badLdVT) + case lwork < minwork && lwork != -1: + panic(badLWork) + case len(work) < max(1, lwork): + panic(shortWork) + } + + // Quick return if possible. + if minmn == 0 { + work[0] = 1 + return true + } + + // Compute optimal workspace size for subroutines. + opts := string(jobU) + string(jobVT) + mnthr := impl.Ilaenv(6, "DGESVD", opts, m, n, 0, 0) + maxwrk := 1 + var wrkbl, bdspac int + if m >= n { + bdspac = 5 * n + impl.Dgeqrf(m, n, a, lda, nil, work, -1) + lwork_dgeqrf := int(work[0]) + + impl.Dorgqr(m, n, n, a, lda, nil, work, -1) + lwork_dorgqr_n := int(work[0]) + impl.Dorgqr(m, m, n, a, lda, nil, work, -1) + lwork_dorgqr_m := int(work[0]) + + impl.Dgebrd(n, n, a, lda, s, nil, nil, nil, work, -1) + lwork_dgebrd := int(work[0]) + + impl.Dorgbr(lapack.GeneratePT, n, n, n, a, lda, nil, work, -1) + lwork_dorgbr_p := int(work[0]) + + impl.Dorgbr(lapack.GenerateQ, n, n, n, a, lda, nil, work, -1) + lwork_dorgbr_q := int(work[0]) + + if m >= mnthr { + if wantun { + // Path 1 (m much larger than n, jobU == None) + maxwrk = n + lwork_dgeqrf + maxwrk = max(maxwrk, 3*n+lwork_dgebrd) + if wantvo || wantvas { + maxwrk = max(maxwrk, 3*n+lwork_dorgbr_p) + } + maxwrk = max(maxwrk, bdspac) + } else if wantuo && wantvn { + // Path 2 (m much larger than n, jobU == Overwrite, jobVT == None) + wrkbl = n + lwork_dgeqrf + wrkbl = max(wrkbl, n+lwork_dorgqr_n) + wrkbl = max(wrkbl, 3*n+lwork_dgebrd) + wrkbl = max(wrkbl, 3*n+lwork_dorgbr_q) + wrkbl = max(wrkbl, bdspac) + maxwrk = max(n*n+wrkbl, n*n+m*n+n) + } else if wantuo && wantvas { + // Path 3 (m much larger than n, jobU == Overwrite, jobVT == Store or All) + wrkbl = n + lwork_dgeqrf + wrkbl = max(wrkbl, n+lwork_dorgqr_n) + wrkbl = max(wrkbl, 3*n+lwork_dgebrd) + wrkbl = max(wrkbl, 3*n+lwork_dorgbr_q) + wrkbl = max(wrkbl, 3*n+lwork_dorgbr_p) + wrkbl = max(wrkbl, bdspac) + maxwrk = max(n*n+wrkbl, n*n+m*n+n) + } else if wantus && wantvn { + // Path 4 (m much larger than n, jobU == Store, jobVT == None) + wrkbl = n + lwork_dgeqrf + wrkbl = max(wrkbl, n+lwork_dorgqr_n) + wrkbl = max(wrkbl, 3*n+lwork_dgebrd) + wrkbl = max(wrkbl, 3*n+lwork_dorgbr_q) + wrkbl = max(wrkbl, bdspac) + maxwrk = n*n + wrkbl + } else if wantus && wantvo { + // Path 5 (m much larger than n, jobU == Store, jobVT == Overwrite) + wrkbl = n + lwork_dgeqrf + wrkbl = max(wrkbl, n+lwork_dorgqr_n) + wrkbl = max(wrkbl, 3*n+lwork_dgebrd) + wrkbl = max(wrkbl, 3*n+lwork_dorgbr_q) + wrkbl = max(wrkbl, 3*n+lwork_dorgbr_p) + wrkbl = max(wrkbl, bdspac) + maxwrk = 2*n*n + wrkbl + } else if wantus && wantvas { + // Path 6 (m much larger than n, jobU == Store, jobVT == Store or All) + wrkbl = n + lwork_dgeqrf + wrkbl = max(wrkbl, n+lwork_dorgqr_n) + wrkbl = max(wrkbl, 3*n+lwork_dgebrd) + wrkbl = max(wrkbl, 3*n+lwork_dorgbr_q) + wrkbl = max(wrkbl, 3*n+lwork_dorgbr_p) + wrkbl = max(wrkbl, bdspac) + maxwrk = n*n + wrkbl + } else if wantua && wantvn { + // Path 7 (m much larger than n, jobU == All, jobVT == None) + wrkbl = n + lwork_dgeqrf + wrkbl = max(wrkbl, n+lwork_dorgqr_m) + wrkbl = max(wrkbl, 3*n+lwork_dgebrd) + wrkbl = max(wrkbl, 3*n+lwork_dorgbr_q) + wrkbl = max(wrkbl, bdspac) + maxwrk = n*n + wrkbl + } else if wantua && wantvo { + // Path 8 (m much larger than n, jobU == All, jobVT == Overwrite) + wrkbl = n + lwork_dgeqrf + wrkbl = max(wrkbl, n+lwork_dorgqr_m) + wrkbl = max(wrkbl, 3*n+lwork_dgebrd) + wrkbl = max(wrkbl, 3*n+lwork_dorgbr_q) + wrkbl = max(wrkbl, 3*n+lwork_dorgbr_p) + wrkbl = max(wrkbl, bdspac) + maxwrk = 2*n*n + wrkbl + } else if wantua && wantvas { + // Path 9 (m much larger than n, jobU == All, jobVT == Store or All) + wrkbl = n + lwork_dgeqrf + wrkbl = max(wrkbl, n+lwork_dorgqr_m) + wrkbl = max(wrkbl, 3*n+lwork_dgebrd) + wrkbl = max(wrkbl, 3*n+lwork_dorgbr_q) + wrkbl = max(wrkbl, 3*n+lwork_dorgbr_p) + wrkbl = max(wrkbl, bdspac) + maxwrk = n*n + wrkbl + } + } else { + // Path 10 (m at least n, but not much larger) + impl.Dgebrd(m, n, a, lda, s, nil, nil, nil, work, -1) + lwork_dgebrd := int(work[0]) + maxwrk = 3*n + lwork_dgebrd + if wantus || wantuo { + impl.Dorgbr(lapack.GenerateQ, m, n, n, a, lda, nil, work, -1) + lwork_dorgbr_q = int(work[0]) + maxwrk = max(maxwrk, 3*n+lwork_dorgbr_q) + } + if wantua { + impl.Dorgbr(lapack.GenerateQ, m, m, n, a, lda, nil, work, -1) + lwork_dorgbr_q := int(work[0]) + maxwrk = max(maxwrk, 3*n+lwork_dorgbr_q) + } + if !wantvn { + maxwrk = max(maxwrk, 3*n+lwork_dorgbr_p) + } + maxwrk = max(maxwrk, bdspac) + } + } else { + bdspac = 5 * m + + impl.Dgelqf(m, n, a, lda, nil, work, -1) + lwork_dgelqf := int(work[0]) + + impl.Dorglq(n, n, m, nil, n, nil, work, -1) + lwork_dorglq_n := int(work[0]) + impl.Dorglq(m, n, m, a, lda, nil, work, -1) + lwork_dorglq_m := int(work[0]) + + impl.Dgebrd(m, m, a, lda, s, nil, nil, nil, work, -1) + lwork_dgebrd := int(work[0]) + + impl.Dorgbr(lapack.GeneratePT, m, m, m, a, n, nil, work, -1) + lwork_dorgbr_p := int(work[0]) + + impl.Dorgbr(lapack.GenerateQ, m, m, m, a, n, nil, work, -1) + lwork_dorgbr_q := int(work[0]) + + if n >= mnthr { + if wantvn { + // Path 1t (n much larger than m, jobVT == None) + maxwrk = m + lwork_dgelqf + maxwrk = max(maxwrk, 3*m+lwork_dgebrd) + if wantuo || wantuas { + maxwrk = max(maxwrk, 3*m+lwork_dorgbr_q) + } + maxwrk = max(maxwrk, bdspac) + } else if wantvo && wantun { + // Path 2t (n much larger than m, jobU == None, jobVT == Overwrite) + wrkbl = m + lwork_dgelqf + wrkbl = max(wrkbl, m+lwork_dorglq_m) + wrkbl = max(wrkbl, 3*m+lwork_dgebrd) + wrkbl = max(wrkbl, 3*m+lwork_dorgbr_p) + wrkbl = max(wrkbl, bdspac) + maxwrk = max(m*m+wrkbl, m*m+m*n+m) + } else if wantvo && wantuas { + // Path 3t (n much larger than m, jobU == Store or All, jobVT == Overwrite) + wrkbl = m + lwork_dgelqf + wrkbl = max(wrkbl, m+lwork_dorglq_m) + wrkbl = max(wrkbl, 3*m+lwork_dgebrd) + wrkbl = max(wrkbl, 3*m+lwork_dorgbr_p) + wrkbl = max(wrkbl, 3*m+lwork_dorgbr_q) + wrkbl = max(wrkbl, bdspac) + maxwrk = max(m*m+wrkbl, m*m+m*n+m) + } else if wantvs && wantun { + // Path 4t (n much larger than m, jobU == None, jobVT == Store) + wrkbl = m + lwork_dgelqf + wrkbl = max(wrkbl, m+lwork_dorglq_m) + wrkbl = max(wrkbl, 3*m+lwork_dgebrd) + wrkbl = max(wrkbl, 3*m+lwork_dorgbr_p) + wrkbl = max(wrkbl, bdspac) + maxwrk = m*m + wrkbl + } else if wantvs && wantuo { + // Path 5t (n much larger than m, jobU == Overwrite, jobVT == Store) + wrkbl = m + lwork_dgelqf + wrkbl = max(wrkbl, m+lwork_dorglq_m) + wrkbl = max(wrkbl, 3*m+lwork_dgebrd) + wrkbl = max(wrkbl, 3*m+lwork_dorgbr_p) + wrkbl = max(wrkbl, 3*m+lwork_dorgbr_q) + wrkbl = max(wrkbl, bdspac) + maxwrk = 2*m*m + wrkbl + } else if wantvs && wantuas { + // Path 6t (n much larger than m, jobU == Store or All, jobVT == Store) + wrkbl = m + lwork_dgelqf + wrkbl = max(wrkbl, m+lwork_dorglq_m) + wrkbl = max(wrkbl, 3*m+lwork_dgebrd) + wrkbl = max(wrkbl, 3*m+lwork_dorgbr_p) + wrkbl = max(wrkbl, 3*m+lwork_dorgbr_q) + wrkbl = max(wrkbl, bdspac) + maxwrk = m*m + wrkbl + } else if wantva && wantun { + // Path 7t (n much larger than m, jobU== None, jobVT == All) + wrkbl = m + lwork_dgelqf + wrkbl = max(wrkbl, m+lwork_dorglq_n) + wrkbl = max(wrkbl, 3*m+lwork_dgebrd) + wrkbl = max(wrkbl, 3*m+lwork_dorgbr_p) + wrkbl = max(wrkbl, bdspac) + maxwrk = m*m + wrkbl + } else if wantva && wantuo { + // Path 8t (n much larger than m, jobU == Overwrite, jobVT == All) + wrkbl = m + lwork_dgelqf + wrkbl = max(wrkbl, m+lwork_dorglq_n) + wrkbl = max(wrkbl, 3*m+lwork_dgebrd) + wrkbl = max(wrkbl, 3*m+lwork_dorgbr_p) + wrkbl = max(wrkbl, 3*m+lwork_dorgbr_q) + wrkbl = max(wrkbl, bdspac) + maxwrk = 2*m*m + wrkbl + } else if wantva && wantuas { + // Path 9t (n much larger than m, jobU == Store or All, jobVT == All) + wrkbl = m + lwork_dgelqf + wrkbl = max(wrkbl, m+lwork_dorglq_n) + wrkbl = max(wrkbl, 3*m+lwork_dgebrd) + wrkbl = max(wrkbl, 3*m+lwork_dorgbr_p) + wrkbl = max(wrkbl, 3*m+lwork_dorgbr_q) + wrkbl = max(wrkbl, bdspac) + maxwrk = m*m + wrkbl + } + } else { + // Path 10t (n greater than m, but not much larger) + impl.Dgebrd(m, n, a, lda, s, nil, nil, nil, work, -1) + lwork_dgebrd = int(work[0]) + maxwrk = 3*m + lwork_dgebrd + if wantvs || wantvo { + impl.Dorgbr(lapack.GeneratePT, m, n, m, a, n, nil, work, -1) + lwork_dorgbr_p = int(work[0]) + maxwrk = max(maxwrk, 3*m+lwork_dorgbr_p) + } + if wantva { + impl.Dorgbr(lapack.GeneratePT, n, n, m, a, n, nil, work, -1) + lwork_dorgbr_p = int(work[0]) + maxwrk = max(maxwrk, 3*m+lwork_dorgbr_p) + } + if !wantun { + maxwrk = max(maxwrk, 3*m+lwork_dorgbr_q) + } + maxwrk = max(maxwrk, bdspac) + } + } + + maxwrk = max(maxwrk, minwork) + if lwork == -1 { + work[0] = float64(maxwrk) + return true + } + + if len(a) < (m-1)*lda+n { + panic(shortA) + } + if len(s) < minmn { + panic(shortS) + } + if (len(u) < (m-1)*ldu+m && wantua) || (len(u) < (m-1)*ldu+minmn && wantus) { + panic(shortU) + } + if (len(vt) < (n-1)*ldvt+n && wantva) || (len(vt) < (minmn-1)*ldvt+n && wantvs) { + panic(shortVT) + } + + // Perform decomposition. + eps := dlamchE + smlnum := math.Sqrt(dlamchS) / eps + bignum := 1 / smlnum + + // Scale A if max element outside range [smlnum, bignum]. + anrm := impl.Dlange(lapack.MaxAbs, m, n, a, lda, nil) + var iscl bool + if anrm > 0 && anrm < smlnum { + iscl = true + impl.Dlascl(lapack.General, 0, 0, anrm, smlnum, m, n, a, lda) + } else if anrm > bignum { + iscl = true + impl.Dlascl(lapack.General, 0, 0, anrm, bignum, m, n, a, lda) + } + + bi := blas64.Implementation() + var ie int + if m >= n { + // If A has sufficiently more rows than columns, use the QR decomposition. + if m >= mnthr { + // m >> n + if wantun { + // Path 1. + itau := 0 + iwork := itau + n + + // Compute A = Q * R. + impl.Dgeqrf(m, n, a, lda, work[itau:itau+n], work[iwork:], lwork-iwork) + + // Zero out below R. + impl.Dlaset(blas.Lower, n-1, n-1, 0, 0, a[lda:], lda) + ie = 0 + itauq := ie + n + itaup := itauq + n + iwork = itaup + n + // Bidiagonalize R in A. + impl.Dgebrd(n, n, a, lda, s, work[ie:], work[itauq:], + work[itaup:], work[iwork:], lwork-iwork) + ncvt := 0 + if wantvo || wantvas { + impl.Dorgbr(lapack.GeneratePT, n, n, n, a, lda, work[itaup:], + work[iwork:], lwork-iwork) + ncvt = n + } + iwork = ie + n + + // Perform bidiagonal QR iteration computing right singular vectors + // of A in A if desired. + ok = impl.Dbdsqr(blas.Upper, n, ncvt, 0, 0, s, work[ie:], + a, lda, work, 1, work, 1, work[iwork:]) + + // If right singular vectors desired in VT, copy them there. + if wantvas { + impl.Dlacpy(blas.All, n, n, a, lda, vt, ldvt) + } + } else if wantuo && wantvn { + // Path 2 + panic(noSVDO) + } else if wantuo && wantvas { + // Path 3 + panic(noSVDO) + } else if wantus { + if wantvn { + // Path 4 + if lwork >= n*n+max(4*n, bdspac) { + // Sufficient workspace for a fast algorithm. + ir := 0 + var ldworkr int + if lwork >= wrkbl+lda*n { + ldworkr = lda + } else { + ldworkr = n + } + itau := ir + ldworkr*n + iwork := itau + n + // Compute A = Q * R. + impl.Dgeqrf(m, n, a, lda, work[itau:itau+n], work[iwork:], lwork-iwork) + + // Copy R to work[ir:], zeroing out below it. + impl.Dlacpy(blas.Upper, n, n, a, lda, work[ir:], ldworkr) + impl.Dlaset(blas.Lower, n-1, n-1, 0, 0, work[ir+ldworkr:], ldworkr) + + // Generate Q in A. + impl.Dorgqr(m, n, n, a, lda, work[itau:itau+n], work[iwork:], lwork-iwork) + ie := itau + itauq := ie + n + itaup := itauq + n + iwork = itaup + n + + // Bidiagonalize R in work[ir:]. + impl.Dgebrd(n, n, work[ir:], ldworkr, s, work[ie:], + work[itauq:], work[itaup:], work[iwork:], lwork-iwork) + + // Generate left vectors bidiagonalizing R in work[ir:]. + impl.Dorgbr(lapack.GenerateQ, n, n, n, work[ir:], ldworkr, + work[itauq:], work[iwork:], lwork-iwork) + iwork = ie + n + + // Perform bidiagonal QR iteration, computing left singular + // vectors of R in work[ir:]. + ok = impl.Dbdsqr(blas.Upper, n, 0, n, 0, s, work[ie:], work, 1, + work[ir:], ldworkr, work, 1, work[iwork:]) + + // Multiply Q in A by left singular vectors of R in + // work[ir:], storing result in U. + bi.Dgemm(blas.NoTrans, blas.NoTrans, m, n, n, 1, a, lda, + work[ir:], ldworkr, 0, u, ldu) + } else { + // Insufficient workspace for a fast algorithm. + itau := 0 + iwork := itau + n + + // Compute A = Q*R, copying result to U. + impl.Dgeqrf(m, n, a, lda, work[itau:itau+n], work[iwork:], lwork-iwork) + impl.Dlacpy(blas.Lower, m, n, a, lda, u, ldu) + + // Generate Q in U. + impl.Dorgqr(m, n, n, u, ldu, work[itau:itau+n], work[iwork:], lwork-iwork) + ie := itau + itauq := ie + n + itaup := itauq + n + iwork = itaup + n + + // Zero out below R in A. + impl.Dlaset(blas.Lower, n-1, n-1, 0, 0, a[lda:], lda) + + // Bidiagonalize R in A. + impl.Dgebrd(n, n, a, lda, s, work[ie:], + work[itauq:], work[itaup:], work[iwork:], lwork-iwork) + + // Multiply Q in U by left vectors bidiagonalizing R. + impl.Dormbr(lapack.ApplyQ, blas.Right, blas.NoTrans, m, n, n, + a, lda, work[itauq:], u, ldu, work[iwork:], lwork-iwork) + iwork = ie + n + + // Perform bidiagonal QR iteration, computing left + // singular vectors of A in U. + ok = impl.Dbdsqr(blas.Upper, n, 0, m, 0, s, work[ie:], work, 1, + u, ldu, work, 1, work[iwork:]) + } + } else if wantvo { + // Path 5 + panic(noSVDO) + } else if wantvas { + // Path 6 + if lwork >= n*n+max(4*n, bdspac) { + // Sufficient workspace for a fast algorithm. + iu := 0 + var ldworku int + if lwork >= wrkbl+lda*n { + ldworku = lda + } else { + ldworku = n + } + itau := iu + ldworku*n + iwork := itau + n + + // Compute A = Q * R. + impl.Dgeqrf(m, n, a, lda, work[itau:itau+n], work[iwork:], lwork-iwork) + // Copy R to work[iu:], zeroing out below it. + impl.Dlacpy(blas.Upper, n, n, a, lda, work[iu:], ldworku) + impl.Dlaset(blas.Lower, n-1, n-1, 0, 0, work[iu+ldworku:], ldworku) + + // Generate Q in A. + impl.Dorgqr(m, n, n, a, lda, work[itau:itau+n], work[iwork:], lwork-iwork) + + ie := itau + itauq := ie + n + itaup := itauq + n + iwork = itaup + n + + // Bidiagonalize R in work[iu:], copying result to VT. + impl.Dgebrd(n, n, work[iu:], ldworku, s, work[ie:], + work[itauq:], work[itaup:], work[iwork:], lwork-iwork) + impl.Dlacpy(blas.Upper, n, n, work[iu:], ldworku, vt, ldvt) + + // Generate left bidiagonalizing vectors in work[iu:]. + impl.Dorgbr(lapack.GenerateQ, n, n, n, work[iu:], ldworku, + work[itauq:], work[iwork:], lwork-iwork) + + // Generate right bidiagonalizing vectors in VT. + impl.Dorgbr(lapack.GeneratePT, n, n, n, vt, ldvt, + work[itaup:], work[iwork:], lwork-iwork) + iwork = ie + n + + // Perform bidiagonal QR iteration, computing left singular + // vectors of R in work[iu:], and computing right singular + // vectors of R in VT. + ok = impl.Dbdsqr(blas.Upper, n, n, n, 0, s, work[ie:], + vt, ldvt, work[iu:], ldworku, work, 1, work[iwork:]) + + // Multiply Q in A by left singular vectors of R in + // work[iu:], storing result in U. + bi.Dgemm(blas.NoTrans, blas.NoTrans, m, n, n, 1, a, lda, + work[iu:], ldworku, 0, u, ldu) + } else { + // Insufficient workspace for a fast algorithm. + itau := 0 + iwork := itau + n + + // Compute A = Q * R, copying result to U. + impl.Dgeqrf(m, n, a, lda, work[itau:itau+n], work[iwork:], lwork-iwork) + impl.Dlacpy(blas.Lower, m, n, a, lda, u, ldu) + + // Generate Q in U. + impl.Dorgqr(m, n, n, u, ldu, work[itau:itau+n], work[iwork:], lwork-iwork) + + // Copy R to VT, zeroing out below it. + impl.Dlacpy(blas.Upper, n, n, a, lda, vt, ldvt) + impl.Dlaset(blas.Lower, n-1, n-1, 0, 0, vt[ldvt:], ldvt) + + ie := itau + itauq := ie + n + itaup := itauq + n + iwork = itaup + n + + // Bidiagonalize R in VT. + impl.Dgebrd(n, n, vt, ldvt, s, work[ie:], + work[itauq:], work[itaup:], work[iwork:], lwork-iwork) + + // Multiply Q in U by left bidiagonalizing vectors in VT. + impl.Dormbr(lapack.ApplyQ, blas.Right, blas.NoTrans, m, n, n, + vt, ldvt, work[itauq:], u, ldu, work[iwork:], lwork-iwork) + + // Generate right bidiagonalizing vectors in VT. + impl.Dorgbr(lapack.GeneratePT, n, n, n, vt, ldvt, + work[itaup:], work[iwork:], lwork-iwork) + iwork = ie + n + + // Perform bidiagonal QR iteration, computing left singular + // vectors of A in U and computing right singular vectors + // of A in VT. + ok = impl.Dbdsqr(blas.Upper, n, n, m, 0, s, work[ie:], + vt, ldvt, u, ldu, work, 1, work[iwork:]) + } + } + } else if wantua { + if wantvn { + // Path 7 + if lwork >= n*n+max(max(n+m, 4*n), bdspac) { + // Sufficient workspace for a fast algorithm. + ir := 0 + var ldworkr int + if lwork >= wrkbl+lda*n { + ldworkr = lda + } else { + ldworkr = n + } + itau := ir + ldworkr*n + iwork := itau + n + + // Compute A = Q*R, copying result to U. + impl.Dgeqrf(m, n, a, lda, work[itau:itau+n], work[iwork:], lwork-iwork) + impl.Dlacpy(blas.Lower, m, n, a, lda, u, ldu) + + // Copy R to work[ir:], zeroing out below it. + impl.Dlacpy(blas.Upper, n, n, a, lda, work[ir:], ldworkr) + impl.Dlaset(blas.Lower, n-1, n-1, 0, 0, work[ir+ldworkr:], ldworkr) + + // Generate Q in U. + impl.Dorgqr(m, m, n, u, ldu, work[itau:itau+n], work[iwork:], lwork-iwork) + ie := itau + itauq := ie + n + itaup := itauq + n + iwork = itaup + n + + // Bidiagonalize R in work[ir:]. + impl.Dgebrd(n, n, work[ir:], ldworkr, s, work[ie:], + work[itauq:], work[itaup:], work[iwork:], lwork-iwork) + + // Generate left bidiagonalizing vectors in work[ir:]. + impl.Dorgbr(lapack.GenerateQ, n, n, n, work[ir:], ldworkr, + work[itauq:], work[iwork:], lwork-iwork) + iwork = ie + n + + // Perform bidiagonal QR iteration, computing left singular + // vectors of R in work[ir:]. + ok = impl.Dbdsqr(blas.Upper, n, 0, n, 0, s, work[ie:], work, 1, + work[ir:], ldworkr, work, 1, work[iwork:]) + + // Multiply Q in U by left singular vectors of R in + // work[ir:], storing result in A. + bi.Dgemm(blas.NoTrans, blas.NoTrans, m, n, n, 1, u, ldu, + work[ir:], ldworkr, 0, a, lda) + + // Copy left singular vectors of A from A to U. + impl.Dlacpy(blas.All, m, n, a, lda, u, ldu) + } else { + // Insufficient workspace for a fast algorithm. + itau := 0 + iwork := itau + n + + // Compute A = Q*R, copying result to U. + impl.Dgeqrf(m, n, a, lda, work[itau:itau+n], work[iwork:], lwork-iwork) + impl.Dlacpy(blas.Lower, m, n, a, lda, u, ldu) + + // Generate Q in U. + impl.Dorgqr(m, m, n, u, ldu, work[itau:itau+n], work[iwork:], lwork-iwork) + ie := itau + itauq := ie + n + itaup := itauq + n + iwork = itaup + n + + // Zero out below R in A. + impl.Dlaset(blas.Lower, n-1, n-1, 0, 0, a[lda:], lda) + + // Bidiagonalize R in A. + impl.Dgebrd(n, n, a, lda, s, work[ie:], + work[itauq:], work[itaup:], work[iwork:], lwork-iwork) + + // Multiply Q in U by left bidiagonalizing vectors in A. + impl.Dormbr(lapack.ApplyQ, blas.Right, blas.NoTrans, m, n, n, + a, lda, work[itauq:], u, ldu, work[iwork:], lwork-iwork) + iwork = ie + n + + // Perform bidiagonal QR iteration, computing left + // singular vectors of A in U. + ok = impl.Dbdsqr(blas.Upper, n, 0, m, 0, s, work[ie:], + work, 1, u, ldu, work, 1, work[iwork:]) + } + } else if wantvo { + // Path 8. + panic(noSVDO) + } else if wantvas { + // Path 9. + if lwork >= n*n+max(max(n+m, 4*n), bdspac) { + // Sufficient workspace for a fast algorithm. + iu := 0 + var ldworku int + if lwork >= wrkbl+lda*n { + ldworku = lda + } else { + ldworku = n + } + itau := iu + ldworku*n + iwork := itau + n + + // Compute A = Q * R, copying result to U. + impl.Dgeqrf(m, n, a, lda, work[itau:itau+n], work[iwork:], lwork-iwork) + impl.Dlacpy(blas.Lower, m, n, a, lda, u, ldu) + + // Generate Q in U. + impl.Dorgqr(m, m, n, u, ldu, work[itau:itau+n], work[iwork:], lwork-iwork) + + // Copy R to work[iu:], zeroing out below it. + impl.Dlacpy(blas.Upper, n, n, a, lda, work[iu:], ldworku) + impl.Dlaset(blas.Lower, n-1, n-1, 0, 0, work[iu+ldworku:], ldworku) + + ie = itau + itauq := ie + n + itaup := itauq + n + iwork = itaup + n + + // Bidiagonalize R in work[iu:], copying result to VT. + impl.Dgebrd(n, n, work[iu:], ldworku, s, work[ie:], + work[itauq:], work[itaup:], work[iwork:], lwork-iwork) + impl.Dlacpy(blas.Upper, n, n, work[iu:], ldworku, vt, ldvt) + + // Generate left bidiagonalizing vectors in work[iu:]. + impl.Dorgbr(lapack.GenerateQ, n, n, n, work[iu:], ldworku, + work[itauq:], work[iwork:], lwork-iwork) + + // Generate right bidiagonalizing vectors in VT. + impl.Dorgbr(lapack.GeneratePT, n, n, n, vt, ldvt, + work[itaup:], work[iwork:], lwork-iwork) + iwork = ie + n + + // Perform bidiagonal QR iteration, computing left singular + // vectors of R in work[iu:] and computing right + // singular vectors of R in VT. + ok = impl.Dbdsqr(blas.Upper, n, n, n, 0, s, work[ie:], + vt, ldvt, work[iu:], ldworku, work, 1, work[iwork:]) + + // Multiply Q in U by left singular vectors of R in + // work[iu:], storing result in A. + bi.Dgemm(blas.NoTrans, blas.NoTrans, m, n, n, 1, + u, ldu, work[iu:], ldworku, 0, a, lda) + + // Copy left singular vectors of A from A to U. + impl.Dlacpy(blas.All, m, n, a, lda, u, ldu) + + /* + // Bidiagonalize R in VT. + impl.Dgebrd(n, n, vt, ldvt, s, work[ie:], + work[itauq:], work[itaup:], work[iwork:], lwork-iwork) + + // Multiply Q in U by left bidiagonalizing vectors in VT. + impl.Dormbr(lapack.ApplyQ, blas.Right, blas.NoTrans, + m, n, n, vt, ldvt, work[itauq:], u, ldu, work[iwork:], lwork-iwork) + + // Generate right bidiagonalizing vectors in VT. + impl.Dorgbr(lapack.GeneratePT, n, n, n, vt, ldvt, + work[itaup:], work[iwork:], lwork-iwork) + iwork = ie + n + + // Perform bidiagonal QR iteration, computing left singular + // vectors of A in U and computing right singular vectors + // of A in VT. + ok = impl.Dbdsqr(blas.Upper, n, n, m, 0, s, work[ie:], + vt, ldvt, u, ldu, work, 1, work[iwork:]) + */ + } else { + // Insufficient workspace for a fast algorithm. + itau := 0 + iwork := itau + n + + // Compute A = Q*R, copying result to U. + impl.Dgeqrf(m, n, a, lda, work[itau:itau+n], work[iwork:], lwork-iwork) + impl.Dlacpy(blas.Lower, m, n, a, lda, u, ldu) + + // Generate Q in U. + impl.Dorgqr(m, m, n, u, ldu, work[itau:itau+n], work[iwork:], lwork-iwork) + + // Copy R from A to VT, zeroing out below it. + impl.Dlacpy(blas.Upper, n, n, a, lda, vt, ldvt) + if n > 1 { + impl.Dlaset(blas.Lower, n-1, n-1, 0, 0, vt[ldvt:], ldvt) + } + + ie := itau + itauq := ie + n + itaup := itauq + n + iwork = itaup + n + + // Bidiagonalize R in VT. + impl.Dgebrd(n, n, vt, ldvt, s, work[ie:], + work[itauq:], work[itaup:], work[iwork:], lwork-iwork) + + // Multiply Q in U by left bidiagonalizing vectors in VT. + impl.Dormbr(lapack.ApplyQ, blas.Right, blas.NoTrans, + m, n, n, vt, ldvt, work[itauq:], u, ldu, work[iwork:], lwork-iwork) + + // Generate right bidiagonizing vectors in VT. + impl.Dorgbr(lapack.GeneratePT, n, n, n, vt, ldvt, + work[itaup:], work[iwork:], lwork-iwork) + iwork = ie + n + + // Perform bidiagonal QR iteration, computing left singular + // vectors of A in U and computing right singular vectors + // of A in VT. + ok = impl.Dbdsqr(blas.Upper, n, n, m, 0, s, work[ie:], + vt, ldvt, u, ldu, work, 1, work[iwork:]) + } + } + } + } else { + // Path 10. + // M at least N, but not much larger. + ie = 0 + itauq := ie + n + itaup := itauq + n + iwork := itaup + n + + // Bidiagonalize A. + impl.Dgebrd(m, n, a, lda, s, work[ie:], work[itauq:], + work[itaup:], work[iwork:], lwork-iwork) + if wantuas { + // Left singular vectors are desired in U. Copy result to U and + // generate left biadiagonalizing vectors in U. + impl.Dlacpy(blas.Lower, m, n, a, lda, u, ldu) + var ncu int + if wantus { + ncu = n + } + if wantua { + ncu = m + } + impl.Dorgbr(lapack.GenerateQ, m, ncu, n, u, ldu, work[itauq:], work[iwork:], lwork-iwork) + } + if wantvas { + // Right singular vectors are desired in VT. Copy result to VT and + // generate left biadiagonalizing vectors in VT. + impl.Dlacpy(blas.Upper, n, n, a, lda, vt, ldvt) + impl.Dorgbr(lapack.GeneratePT, n, n, n, vt, ldvt, work[itaup:], work[iwork:], lwork-iwork) + } + if wantuo { + panic(noSVDO) + } + if wantvo { + panic(noSVDO) + } + iwork = ie + n + var nru, ncvt int + if wantuas || wantuo { + nru = m + } + if wantun { + nru = 0 + } + if wantvas || wantvo { + ncvt = n + } + if wantvn { + ncvt = 0 + } + if !wantuo && !wantvo { + // Perform bidiagonal QR iteration, if desired, computing left + // singular vectors in U and right singular vectors in VT. + ok = impl.Dbdsqr(blas.Upper, n, ncvt, nru, 0, s, work[ie:], + vt, ldvt, u, ldu, work, 1, work[iwork:]) + } else { + // There will be two branches when the implementation is complete. + panic(noSVDO) + } + } + } else { + // A has more columns than rows. If A has sufficiently more columns than + // rows, first reduce using the LQ decomposition. + if n >= mnthr { + // n >> m. + if wantvn { + // Path 1t. + itau := 0 + iwork := itau + m + + // Compute A = L*Q. + impl.Dgelqf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork) + + // Zero out above L. + impl.Dlaset(blas.Upper, m-1, m-1, 0, 0, a[1:], lda) + ie := 0 + itauq := ie + m + itaup := itauq + m + iwork = itaup + m + + // Bidiagonalize L in A. + impl.Dgebrd(m, m, a, lda, s, work[ie:itauq], + work[itauq:itaup], work[itaup:iwork], work[iwork:], lwork-iwork) + if wantuo || wantuas { + impl.Dorgbr(lapack.GenerateQ, m, m, m, a, lda, + work[itauq:], work[iwork:], lwork-iwork) + } + iwork = ie + m + nru := 0 + if wantuo || wantuas { + nru = m + } + + // Perform bidiagonal QR iteration, computing left singular vectors + // of A in A if desired. + ok = impl.Dbdsqr(blas.Upper, m, 0, nru, 0, s, work[ie:], + work, 1, a, lda, work, 1, work[iwork:]) + + // If left singular vectors desired in U, copy them there. + if wantuas { + impl.Dlacpy(blas.All, m, m, a, lda, u, ldu) + } + } else if wantvo && wantun { + // Path 2t. + panic(noSVDO) + } else if wantvo && wantuas { + // Path 3t. + panic(noSVDO) + } else if wantvs { + if wantun { + // Path 4t. + if lwork >= m*m+max(4*m, bdspac) { + // Sufficient workspace for a fast algorithm. + ir := 0 + var ldworkr int + if lwork >= wrkbl+lda*m { + ldworkr = lda + } else { + ldworkr = m + } + itau := ir + ldworkr*m + iwork := itau + m + + // Compute A = L*Q. + impl.Dgelqf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork) + + // Copy L to work[ir:], zeroing out above it. + impl.Dlacpy(blas.Lower, m, m, a, lda, work[ir:], ldworkr) + impl.Dlaset(blas.Upper, m-1, m-1, 0, 0, work[ir+1:], ldworkr) + + // Generate Q in A. + impl.Dorglq(m, n, m, a, lda, work[itau:], work[iwork:], lwork-iwork) + ie := itau + itauq := ie + m + itaup := itauq + m + iwork = itaup + m + + // Bidiagonalize L in work[ir:]. + impl.Dgebrd(m, m, work[ir:], ldworkr, s, work[ie:], + work[itauq:], work[itaup:], work[iwork:], lwork-iwork) + + // Generate right vectors bidiagonalizing L in work[ir:]. + impl.Dorgbr(lapack.GeneratePT, m, m, m, work[ir:], ldworkr, + work[itaup:], work[iwork:], lwork-iwork) + iwork = ie + m + + // Perform bidiagonal QR iteration, computing right singular + // vectors of L in work[ir:]. + ok = impl.Dbdsqr(blas.Upper, m, m, 0, 0, s, work[ie:], + work[ir:], ldworkr, work, 1, work, 1, work[iwork:]) + + // Multiply right singular vectors of L in work[ir:] by + // Q in A, storing result in VT. + bi.Dgemm(blas.NoTrans, blas.NoTrans, m, n, m, 1, + work[ir:], ldworkr, a, lda, 0, vt, ldvt) + } else { + // Insufficient workspace for a fast algorithm. + itau := 0 + iwork := itau + m + + // Compute A = L*Q. + impl.Dgelqf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork) + + // Copy result to VT. + impl.Dlacpy(blas.Upper, m, n, a, lda, vt, ldvt) + + // Generate Q in VT. + impl.Dorglq(m, n, m, vt, ldvt, work[itau:], work[iwork:], lwork-iwork) + ie := itau + itauq := ie + m + itaup := itauq + m + iwork = itaup + m + + // Zero out above L in A. + impl.Dlaset(blas.Upper, m-1, m-1, 0, 0, a[1:], lda) + + // Bidiagonalize L in A. + impl.Dgebrd(m, m, a, lda, s, work[ie:], + work[itauq:], work[itaup:], work[iwork:], lwork-iwork) + + // Multiply right vectors bidiagonalizing L by Q in VT. + impl.Dormbr(lapack.ApplyP, blas.Left, blas.Trans, m, n, m, + a, lda, work[itaup:], vt, ldvt, work[iwork:], lwork-iwork) + iwork = ie + m + + // Perform bidiagonal QR iteration, computing right + // singular vectors of A in VT. + ok = impl.Dbdsqr(blas.Upper, m, n, 0, 0, s, work[ie:], + vt, ldvt, work, 1, work, 1, work[iwork:]) + } + } else if wantuo { + // Path 5t. + panic(noSVDO) + } else if wantuas { + // Path 6t. + if lwork >= m*m+max(4*m, bdspac) { + // Sufficient workspace for a fast algorithm. + iu := 0 + var ldworku int + if lwork >= wrkbl+lda*m { + ldworku = lda + } else { + ldworku = m + } + itau := iu + ldworku*m + iwork := itau + m + + // Compute A = L*Q. + impl.Dgelqf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork) + + // Copy L to work[iu:], zeroing out above it. + impl.Dlacpy(blas.Lower, m, m, a, lda, work[iu:], ldworku) + impl.Dlaset(blas.Upper, m-1, m-1, 0, 0, work[iu+1:], ldworku) + + // Generate Q in A. + impl.Dorglq(m, n, m, a, lda, work[itau:], work[iwork:], lwork-iwork) + ie := itau + itauq := ie + m + itaup := itauq + m + iwork = itaup + m + + // Bidiagonalize L in work[iu:], copying result to U. + impl.Dgebrd(m, m, work[iu:], ldworku, s, work[ie:], + work[itauq:], work[itaup:], work[iwork:], lwork-iwork) + impl.Dlacpy(blas.Lower, m, m, work[iu:], ldworku, u, ldu) + + // Generate right bidiagionalizing vectors in work[iu:]. + impl.Dorgbr(lapack.GeneratePT, m, m, m, work[iu:], ldworku, + work[itaup:], work[iwork:], lwork-iwork) + + // Generate left bidiagonalizing vectors in U. + impl.Dorgbr(lapack.GenerateQ, m, m, m, u, ldu, work[itauq:], work[iwork:], lwork-iwork) + iwork = ie + m + + // Perform bidiagonal QR iteration, computing left singular + // vectors of L in U and computing right singular vectors of + // L in work[iu:]. + ok = impl.Dbdsqr(blas.Upper, m, m, m, 0, s, work[ie:], + work[iu:], ldworku, u, ldu, work, 1, work[iwork:]) + + // Multiply right singular vectors of L in work[iu:] by + // Q in A, storing result in VT. + bi.Dgemm(blas.NoTrans, blas.NoTrans, m, n, m, 1, + work[iu:], ldworku, a, lda, 0, vt, ldvt) + } else { + // Insufficient workspace for a fast algorithm. + itau := 0 + iwork := itau + m + + // Compute A = L*Q, copying result to VT. + impl.Dgelqf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork) + impl.Dlacpy(blas.Upper, m, n, a, lda, vt, ldvt) + + // Generate Q in VT. + impl.Dorglq(m, n, m, vt, ldvt, work[itau:], work[iwork:], lwork-iwork) + + // Copy L to U, zeroing out above it. + impl.Dlacpy(blas.Lower, m, m, a, lda, u, ldu) + impl.Dlaset(blas.Upper, m-1, m-1, 0, 0, u[1:], ldu) + + ie := itau + itauq := ie + m + itaup := itauq + m + iwork = itaup + m + + // Bidiagonalize L in U. + impl.Dgebrd(m, m, u, ldu, s, work[ie:], + work[itauq:], work[itaup:], work[iwork:], lwork-iwork) + + // Multiply right bidiagonalizing vectors in U by Q in VT. + impl.Dormbr(lapack.ApplyP, blas.Left, blas.Trans, m, n, m, + u, ldu, work[itaup:], vt, ldvt, work[iwork:], lwork-iwork) + + // Generate left bidiagonalizing vectors in U. + impl.Dorgbr(lapack.GenerateQ, m, m, m, u, ldu, work[itauq:], work[iwork:], lwork-iwork) + iwork = ie + m + + // Perform bidiagonal QR iteration, computing left singular + // vectors of A in U and computing right singular vectors + // of A in VT. + ok = impl.Dbdsqr(blas.Upper, m, n, m, 0, s, work[ie:], vt, ldvt, + u, ldu, work, 1, work[iwork:]) + } + } + } else if wantva { + if wantun { + // Path 7t. + if lwork >= m*m+max(max(n+m, 4*m), bdspac) { + // Sufficient workspace for a fast algorithm. + ir := 0 + var ldworkr int + if lwork >= wrkbl+lda*m { + ldworkr = lda + } else { + ldworkr = m + } + itau := ir + ldworkr*m + iwork := itau + m + + // Compute A = L*Q, copying result to VT. + impl.Dgelqf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork) + impl.Dlacpy(blas.Upper, m, n, a, lda, vt, ldvt) + + // Copy L to work[ir:], zeroing out above it. + impl.Dlacpy(blas.Lower, m, m, a, lda, work[ir:], ldworkr) + impl.Dlaset(blas.Upper, m-1, m-1, 0, 0, work[ir+1:], ldworkr) + + // Generate Q in VT. + impl.Dorglq(n, n, m, vt, ldvt, work[itau:], work[iwork:], lwork-iwork) + + ie := itau + itauq := ie + m + itaup := itauq + m + iwork = itaup + m + + // Bidiagonalize L in work[ir:]. + impl.Dgebrd(m, m, work[ir:], ldworkr, s, work[ie:], + work[itauq:], work[itaup:], work[iwork:], lwork-iwork) + + // Generate right bidiagonalizing vectors in work[ir:]. + impl.Dorgbr(lapack.GeneratePT, m, m, m, work[ir:], ldworkr, + work[itaup:], work[iwork:], lwork-iwork) + iwork = ie + m + + // Perform bidiagonal QR iteration, computing right + // singular vectors of L in work[ir:]. + ok = impl.Dbdsqr(blas.Upper, m, m, 0, 0, s, work[ie:], + work[ir:], ldworkr, work, 1, work, 1, work[iwork:]) + + // Multiply right singular vectors of L in work[ir:] by + // Q in VT, storing result in A. + bi.Dgemm(blas.NoTrans, blas.NoTrans, m, n, m, 1, + work[ir:], ldworkr, vt, ldvt, 0, a, lda) + + // Copy right singular vectors of A from A to VT. + impl.Dlacpy(blas.All, m, n, a, lda, vt, ldvt) + } else { + // Insufficient workspace for a fast algorithm. + itau := 0 + iwork := itau + m + // Compute A = L * Q, copying result to VT. + impl.Dgelqf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork) + impl.Dlacpy(blas.Upper, m, n, a, lda, vt, ldvt) + + // Generate Q in VT. + impl.Dorglq(n, n, m, vt, ldvt, work[itau:], work[iwork:], lwork-iwork) + + ie := itau + itauq := ie + m + itaup := itauq + m + iwork = itaup + m + + // Zero out above L in A. + impl.Dlaset(blas.Upper, m-1, m-1, 0, 0, a[1:], lda) + + // Bidiagonalize L in A. + impl.Dgebrd(m, m, a, lda, s, work[ie:], work[itauq:], + work[itaup:], work[iwork:], lwork-iwork) + + // Multiply right bidiagonalizing vectors in A by Q in VT. + impl.Dormbr(lapack.ApplyP, blas.Left, blas.Trans, m, n, m, + a, lda, work[itaup:], vt, ldvt, work[iwork:], lwork-iwork) + iwork = ie + m + + // Perform bidiagonal QR iteration, computing right singular + // vectors of A in VT. + ok = impl.Dbdsqr(blas.Upper, m, n, 0, 0, s, work[ie:], + vt, ldvt, work, 1, work, 1, work[iwork:]) + } + } else if wantuo { + panic(noSVDO) + } else if wantuas { + // Path 9t. + if lwork >= m*m+max(max(m+n, 4*m), bdspac) { + // Sufficient workspace for a fast algorithm. + iu := 0 + + var ldworku int + if lwork >= wrkbl+lda*m { + ldworku = lda + } else { + ldworku = m + } + itau := iu + ldworku*m + iwork := itau + m + + // Generate A = L * Q copying result to VT. + impl.Dgelqf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork) + impl.Dlacpy(blas.Upper, m, n, a, lda, vt, ldvt) + + // Generate Q in VT. + impl.Dorglq(n, n, m, vt, ldvt, work[itau:], work[iwork:], lwork-iwork) + + // Copy L to work[iu:], zeroing out above it. + impl.Dlacpy(blas.Lower, m, m, a, lda, work[iu:], ldworku) + impl.Dlaset(blas.Upper, m-1, m-1, 0, 0, work[iu+1:], ldworku) + ie = itau + itauq := ie + m + itaup := itauq + m + iwork = itaup + m + + // Bidiagonalize L in work[iu:], copying result to U. + impl.Dgebrd(m, m, work[iu:], ldworku, s, work[ie:], + work[itauq:], work[itaup:], work[iwork:], lwork-iwork) + impl.Dlacpy(blas.Lower, m, m, work[iu:], ldworku, u, ldu) + + // Generate right bidiagonalizing vectors in work[iu:]. + impl.Dorgbr(lapack.GeneratePT, m, m, m, work[iu:], ldworku, + work[itaup:], work[iwork:], lwork-iwork) + + // Generate left bidiagonalizing vectors in U. + impl.Dorgbr(lapack.GenerateQ, m, m, m, u, ldu, work[itauq:], work[iwork:], lwork-iwork) + iwork = ie + m + + // Perform bidiagonal QR iteration, computing left singular + // vectors of L in U and computing right singular vectors + // of L in work[iu:]. + ok = impl.Dbdsqr(blas.Upper, m, m, m, 0, s, work[ie:], + work[iu:], ldworku, u, ldu, work, 1, work[iwork:]) + + // Multiply right singular vectors of L in work[iu:] + // Q in VT, storing result in A. + bi.Dgemm(blas.NoTrans, blas.NoTrans, m, n, m, 1, + work[iu:], ldworku, vt, ldvt, 0, a, lda) + + // Copy right singular vectors of A from A to VT. + impl.Dlacpy(blas.All, m, n, a, lda, vt, ldvt) + } else { + // Insufficient workspace for a fast algorithm. + itau := 0 + iwork := itau + m + + // Compute A = L * Q, copying result to VT. + impl.Dgelqf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork) + impl.Dlacpy(blas.Upper, m, n, a, lda, vt, ldvt) + + // Generate Q in VT. + impl.Dorglq(n, n, m, vt, ldvt, work[itau:], work[iwork:], lwork-iwork) + + // Copy L to U, zeroing out above it. + impl.Dlacpy(blas.Lower, m, m, a, lda, u, ldu) + impl.Dlaset(blas.Upper, m-1, m-1, 0, 0, u[1:], ldu) + + ie = itau + itauq := ie + m + itaup := itauq + m + iwork = itaup + m + + // Bidiagonalize L in U. + impl.Dgebrd(m, m, u, ldu, s, work[ie:], work[itauq:], + work[itaup:], work[iwork:], lwork-iwork) + + // Multiply right bidiagonalizing vectors in U by Q in VT. + impl.Dormbr(lapack.ApplyP, blas.Left, blas.Trans, m, n, m, + u, ldu, work[itaup:], vt, ldvt, work[iwork:], lwork-iwork) + + // Generate left bidiagonalizing vectors in U. + impl.Dorgbr(lapack.GenerateQ, m, m, m, u, ldu, work[itauq:], work[iwork:], lwork-iwork) + iwork = ie + m + + // Perform bidiagonal QR iteration, computing left singular + // vectors of A in U and computing right singular vectors + // of A in VT. + ok = impl.Dbdsqr(blas.Upper, m, n, m, 0, s, work[ie:], + vt, ldvt, u, ldu, work, 1, work[iwork:]) + } + } + } + } else { + // Path 10t. + // N at least M, but not much larger. + ie = 0 + itauq := ie + m + itaup := itauq + m + iwork := itaup + m + + // Bidiagonalize A. + impl.Dgebrd(m, n, a, lda, s, work[ie:], work[itauq:], work[itaup:], work[iwork:], lwork-iwork) + if wantuas { + // If left singular vectors desired in U, copy result to U and + // generate left bidiagonalizing vectors in U. + impl.Dlacpy(blas.Lower, m, m, a, lda, u, ldu) + impl.Dorgbr(lapack.GenerateQ, m, m, n, u, ldu, work[itauq:], work[iwork:], lwork-iwork) + } + if wantvas { + // If right singular vectors desired in VT, copy result to VT + // and generate right bidiagonalizing vectors in VT. + impl.Dlacpy(blas.Upper, m, n, a, lda, vt, ldvt) + var nrvt int + if wantva { + nrvt = n + } else { + nrvt = m + } + impl.Dorgbr(lapack.GeneratePT, nrvt, n, m, vt, ldvt, work[itaup:], work[iwork:], lwork-iwork) + } + if wantuo { + panic(noSVDO) + } + if wantvo { + panic(noSVDO) + } + iwork = ie + m + var nru, ncvt int + if wantuas || wantuo { + nru = m + } + if wantvas || wantvo { + ncvt = n + } + if !wantuo && !wantvo { + // Perform bidiagonal QR iteration, if desired, computing left + // singular vectors in U and computing right singular vectors in + // VT. + ok = impl.Dbdsqr(blas.Lower, m, ncvt, nru, 0, s, work[ie:], + vt, ldvt, u, ldu, work, 1, work[iwork:]) + } else { + // There will be two branches when the implementation is complete. + panic(noSVDO) + } + } + } + if !ok { + if ie > 1 { + for i := 0; i < minmn-1; i++ { + work[i+1] = work[i+ie] + } + } + if ie < 1 { + for i := minmn - 2; i >= 0; i-- { + work[i+1] = work[i+ie] + } + } + } + // Undo scaling if necessary. + if iscl { + if anrm > bignum { + impl.Dlascl(lapack.General, 0, 0, bignum, anrm, 1, minmn, s, minmn) + } + if !ok && anrm > bignum { + impl.Dlascl(lapack.General, 0, 0, bignum, anrm, 1, minmn-1, work[1:], minmn) + } + if anrm < smlnum { + impl.Dlascl(lapack.General, 0, 0, smlnum, anrm, 1, minmn, s, minmn) + } + if !ok && anrm < smlnum { + impl.Dlascl(lapack.General, 0, 0, smlnum, anrm, 1, minmn-1, work[1:], minmn) + } + } + work[0] = float64(maxwrk) + return ok +} diff --git a/vendor/gonum.org/v1/gonum/lapack/gonum/dgetc2.go b/vendor/gonum.org/v1/gonum/lapack/gonum/dgetc2.go new file mode 100644 index 0000000000..41203e9fa2 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dgetc2.go @@ -0,0 +1,125 @@ +// Copyright ©2021 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gonum + +import ( + "math" + + "gonum.org/v1/gonum/blas/blas64" +) + +// Dgetc2 computes an LU factorization with complete pivoting of the n×n matrix +// A. The factorization has the form +// +// A = P * L * U * Q, +// +// where P and Q are permutation matrices, L is lower triangular with unit +// diagonal elements and U is upper triangular. +// +// On entry, a contains the matrix A to be factored. On return, a is overwritten +// with the factors L and U. The unit diagonal elements of L are not stored. +// +// On return, ipiv and jpiv contain the pivot indices: row i has been +// interchanged with row ipiv[i] and column j has been interchanged with column +// jpiv[j]. ipiv and jpiv must have length n, otherwise Dgetc2 will panic. +// +// If k is non-negative, then U[k,k] is likely to produce overflow when solving +// for x in A*x=b and U has been perturbed to avoid the overflow. +// +// Dgetc2 is an internal routine. It is exported for testing purposes. +func (impl Implementation) Dgetc2(n int, a []float64, lda int, ipiv, jpiv []int) (k int) { + switch { + case n < 0: + panic(nLT0) + case lda < max(1, n): + panic(badLdA) + } + + // Negative k indicates U was not perturbed. + k = -1 + + // Quick return if possible. + if n == 0 { + return k + } + + switch { + case len(a) < (n-1)*lda+n: + panic(shortA) + case len(ipiv) != n: + panic(badLenIpiv) + case len(jpiv) != n: + panic(badLenJpvt) + } + + const ( + eps = dlamchP + smlnum = dlamchS / eps + ) + + if n == 1 { + ipiv[0], jpiv[0] = 0, 0 + if math.Abs(a[0]) < smlnum { + k = 0 + a[0] = smlnum + } + return k + } + + // Factorize A using complete pivoting. + // Set pivots less than smin to smin. + var smin float64 + var ipv, jpv int + bi := blas64.Implementation() + for i := 0; i < n-1; i++ { + var xmax float64 + for ip := i; ip < n; ip++ { + for jp := i; jp < n; jp++ { + if math.Abs(a[ip*lda+jp]) >= xmax { + xmax = math.Abs(a[ip*lda+jp]) + ipv = ip + jpv = jp + } + } + } + if i == 0 { + smin = math.Max(eps*xmax, smlnum) + } + + // Swap rows. + if ipv != i { + bi.Dswap(n, a[ipv*lda:], 1, a[i*lda:], 1) + } + ipiv[i] = ipv + + // Swap columns. + if jpv != i { + bi.Dswap(n, a[jpv:], lda, a[i:], lda) + } + jpiv[i] = jpv + + // Check for singularity. + if math.Abs(a[i*lda+i]) < smin { + k = i + a[i*lda+i] = smin + } + + for j := i + 1; j < n; j++ { + a[j*lda+i] /= a[i*lda+i] + } + bi.Dger(n-i-1, n-i-1, -1, a[(i+1)*lda+i:], lda, a[i*lda+i+1:], 1, a[(i+1)*lda+i+1:], lda) + } + + if math.Abs(a[(n-1)*lda+n-1]) < smin { + k = n - 1 + a[(n-1)*lda+(n-1)] = smin + } + + // Set last pivots to last index. + ipiv[n-1] = n - 1 + jpiv[n-1] = n - 1 + + return k +} diff --git a/vendor/gonum.org/v1/gonum/lapack/gonum/dgetf2.go b/vendor/gonum.org/v1/gonum/lapack/gonum/dgetf2.go new file mode 100644 index 0000000000..6a7003cf31 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dgetf2.go @@ -0,0 +1,90 @@ +// Copyright ©2015 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gonum + +import ( + "math" + + "gonum.org/v1/gonum/blas/blas64" +) + +// Dgetf2 computes the LU decomposition of an m×n matrix A using partial +// pivoting with row interchanges. +// +// The LU decomposition is a factorization of A into +// +// A = P * L * U +// +// where P is a permutation matrix, L is a lower triangular with unit diagonal +// elements (lower trapezoidal if m > n), and U is upper triangular (upper +// trapezoidal if m < n). +// +// On entry, a contains the matrix A. On return, L and U are stored in place +// into a, and P is represented by ipiv. +// +// ipiv contains a sequence of row interchanges. It indicates that row i of the +// matrix was interchanged with ipiv[i]. ipiv must have length min(m,n), and +// Dgetf2 will panic otherwise. ipiv is zero-indexed. +// +// Dgetf2 returns whether the matrix A is nonsingular. The LU decomposition will +// be computed regardless of the singularity of A, but the result should not be +// used to solve a system of equation. +// +// Dgetf2 is an internal routine. It is exported for testing purposes. +func (Implementation) Dgetf2(m, n int, a []float64, lda int, ipiv []int) (ok bool) { + mn := min(m, n) + switch { + case m < 0: + panic(mLT0) + case n < 0: + panic(nLT0) + case lda < max(1, n): + panic(badLdA) + } + + // Quick return if possible. + if mn == 0 { + return true + } + + switch { + case len(a) < (m-1)*lda+n: + panic(shortA) + case len(ipiv) != mn: + panic(badLenIpiv) + } + + bi := blas64.Implementation() + + sfmin := dlamchS + ok = true + for j := 0; j < mn; j++ { + // Find a pivot and test for singularity. + jp := j + bi.Idamax(m-j, a[j*lda+j:], lda) + ipiv[j] = jp + if a[jp*lda+j] == 0 { + ok = false + } else { + // Swap the rows if necessary. + if jp != j { + bi.Dswap(n, a[j*lda:], 1, a[jp*lda:], 1) + } + if j < m-1 { + aj := a[j*lda+j] + if math.Abs(aj) >= sfmin { + bi.Dscal(m-j-1, 1/aj, a[(j+1)*lda+j:], lda) + } else { + for i := 0; i < m-j-1; i++ { + a[(j+1)*lda+j] = a[(j+1)*lda+j] / a[lda*j+j] + } + } + } + } + if j < mn-1 { + bi.Dger(m-j-1, n-j-1, -1, a[(j+1)*lda+j:], lda, a[j*lda+j+1:], 1, a[(j+1)*lda+j+1:], lda) + } + } + return ok +} diff --git a/vendor/gonum.org/v1/gonum/lapack/gonum/dgetrf.go b/vendor/gonum.org/v1/gonum/lapack/gonum/dgetrf.go new file mode 100644 index 0000000000..38ae8efa14 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dgetrf.go @@ -0,0 +1,89 @@ +// Copyright ©2015 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gonum + +import ( + "gonum.org/v1/gonum/blas" + "gonum.org/v1/gonum/blas/blas64" +) + +// Dgetrf computes the LU decomposition of an m×n matrix A using partial +// pivoting with row interchanges. +// +// The LU decomposition is a factorization of A into +// +// A = P * L * U +// +// where P is a permutation matrix, L is a lower triangular with unit diagonal +// elements (lower trapezoidal if m > n), and U is upper triangular (upper +// trapezoidal if m < n). +// +// On entry, a contains the matrix A. On return, L and U are stored in place +// into a, and P is represented by ipiv. +// +// ipiv contains a sequence of row interchanges. It indicates that row i of the +// matrix was interchanged with ipiv[i]. ipiv must have length min(m,n), and +// Dgetrf will panic otherwise. ipiv is zero-indexed. +// +// Dgetrf returns whether the matrix A is nonsingular. The LU decomposition will +// be computed regardless of the singularity of A, but the result should not be +// used to solve a system of equation. +func (impl Implementation) Dgetrf(m, n int, a []float64, lda int, ipiv []int) (ok bool) { + mn := min(m, n) + switch { + case m < 0: + panic(mLT0) + case n < 0: + panic(nLT0) + case lda < max(1, n): + panic(badLdA) + } + + // Quick return if possible. + if mn == 0 { + return true + } + + switch { + case len(a) < (m-1)*lda+n: + panic(shortA) + case len(ipiv) != mn: + panic(badLenIpiv) + } + + bi := blas64.Implementation() + + nb := impl.Ilaenv(1, "DGETRF", " ", m, n, -1, -1) + if nb <= 1 || mn <= nb { + // Use the unblocked algorithm. + return impl.Dgetf2(m, n, a, lda, ipiv) + } + ok = true + for j := 0; j < mn; j += nb { + jb := min(mn-j, nb) + blockOk := impl.Dgetf2(m-j, jb, a[j*lda+j:], lda, ipiv[j:j+jb]) + if !blockOk { + ok = false + } + for i := j; i <= min(m-1, j+jb-1); i++ { + ipiv[i] = j + ipiv[i] + } + impl.Dlaswp(j, a, lda, j, j+jb-1, ipiv[:j+jb], 1) + if j+jb < n { + impl.Dlaswp(n-j-jb, a[j+jb:], lda, j, j+jb-1, ipiv[:j+jb], 1) + bi.Dtrsm(blas.Left, blas.Lower, blas.NoTrans, blas.Unit, + jb, n-j-jb, 1, + a[j*lda+j:], lda, + a[j*lda+j+jb:], lda) + if j+jb < m { + bi.Dgemm(blas.NoTrans, blas.NoTrans, m-j-jb, n-j-jb, jb, -1, + a[(j+jb)*lda+j:], lda, + a[j*lda+j+jb:], lda, + 1, a[(j+jb)*lda+j+jb:], lda) + } + } + } + return ok +} diff --git a/vendor/gonum.org/v1/gonum/lapack/gonum/dgetri.go b/vendor/gonum.org/v1/gonum/lapack/gonum/dgetri.go new file mode 100644 index 0000000000..b2f2ae46b9 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dgetri.go @@ -0,0 +1,116 @@ +// Copyright ©2015 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gonum + +import ( + "gonum.org/v1/gonum/blas" + "gonum.org/v1/gonum/blas/blas64" +) + +// Dgetri computes the inverse of the matrix A using the LU factorization computed +// by Dgetrf. On entry, a contains the PLU decomposition of A as computed by +// Dgetrf and on exit contains the reciprocal of the original matrix. +// +// Dgetri will not perform the inversion if the matrix is singular, and returns +// a boolean indicating whether the inversion was successful. +// +// work is temporary storage, and lwork specifies the usable memory length. +// At minimum, lwork >= n and this function will panic otherwise. +// Dgetri is a blocked inversion, but the block size is limited +// by the temporary space available. If lwork == -1, instead of performing Dgetri, +// the optimal work length will be stored into work[0]. +func (impl Implementation) Dgetri(n int, a []float64, lda int, ipiv []int, work []float64, lwork int) (ok bool) { + iws := max(1, n) + switch { + case n < 0: + panic(nLT0) + case lda < max(1, n): + panic(badLdA) + case lwork < iws && lwork != -1: + panic(badLWork) + case len(work) < max(1, lwork): + panic(shortWork) + } + + if n == 0 { + work[0] = 1 + return true + } + + nb := impl.Ilaenv(1, "DGETRI", " ", n, -1, -1, -1) + if lwork == -1 { + work[0] = float64(n * nb) + return true + } + + switch { + case len(a) < (n-1)*lda+n: + panic(shortA) + case len(ipiv) != n: + panic(badLenIpiv) + } + + // Form inv(U). + ok = impl.Dtrtri(blas.Upper, blas.NonUnit, n, a, lda) + if !ok { + return false + } + + nbmin := 2 + if 1 < nb && nb < n { + iws = max(n*nb, 1) + if lwork < iws { + nb = lwork / n + nbmin = max(2, impl.Ilaenv(2, "DGETRI", " ", n, -1, -1, -1)) + } + } + ldwork := nb + + bi := blas64.Implementation() + // Solve the equation inv(A)*L = inv(U) for inv(A). + // TODO(btracey): Replace this with a more row-major oriented algorithm. + if nb < nbmin || n <= nb { + // Unblocked code. + for j := n - 1; j >= 0; j-- { + for i := j + 1; i < n; i++ { + // Copy current column of L to work and replace with zeros. + work[i] = a[i*lda+j] + a[i*lda+j] = 0 + } + // Compute current column of inv(A). + if j < n-1 { + bi.Dgemv(blas.NoTrans, n, n-j-1, -1, a[(j+1):], lda, work[(j+1):], 1, 1, a[j:], lda) + } + } + } else { + // Blocked code. + nn := ((n - 1) / nb) * nb + for j := nn; j >= 0; j -= nb { + jb := min(nb, n-j) + // Copy current block column of L to work and replace + // with zeros. + for jj := j; jj < j+jb; jj++ { + for i := jj + 1; i < n; i++ { + work[i*ldwork+(jj-j)] = a[i*lda+jj] + a[i*lda+jj] = 0 + } + } + // Compute current block column of inv(A). + if j+jb < n { + bi.Dgemm(blas.NoTrans, blas.NoTrans, n, jb, n-j-jb, -1, a[(j+jb):], lda, work[(j+jb)*ldwork:], ldwork, 1, a[j:], lda) + } + bi.Dtrsm(blas.Right, blas.Lower, blas.NoTrans, blas.Unit, n, jb, 1, work[j*ldwork:], ldwork, a[j:], lda) + } + } + // Apply column interchanges. + for j := n - 2; j >= 0; j-- { + jp := ipiv[j] + if jp != j { + bi.Dswap(n, a[j:], lda, a[jp:], lda) + } + } + work[0] = float64(iws) + return true +} diff --git a/vendor/gonum.org/v1/gonum/lapack/gonum/dgetrs.go b/vendor/gonum.org/v1/gonum/lapack/gonum/dgetrs.go new file mode 100644 index 0000000000..35b33aa7d7 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dgetrs.go @@ -0,0 +1,74 @@ +// Copyright ©2015 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gonum + +import ( + "gonum.org/v1/gonum/blas" + "gonum.org/v1/gonum/blas/blas64" +) + +// Dgetrs solves a system of equations using an LU factorization. +// The system of equations solved is +// +// A * X = B if trans == blas.Trans +// Aᵀ * X = B if trans == blas.NoTrans +// +// A is a general n×n matrix with stride lda. B is a general matrix of size n×nrhs. +// +// On entry b contains the elements of the matrix B. On exit, b contains the +// elements of X, the solution to the system of equations. +// +// a and ipiv contain the LU factorization of A and the permutation indices as +// computed by Dgetrf. ipiv is zero-indexed. +func (impl Implementation) Dgetrs(trans blas.Transpose, n, nrhs int, a []float64, lda int, ipiv []int, b []float64, ldb int) { + switch { + case trans != blas.NoTrans && trans != blas.Trans && trans != blas.ConjTrans: + panic(badTrans) + case n < 0: + panic(nLT0) + case nrhs < 0: + panic(nrhsLT0) + case lda < max(1, n): + panic(badLdA) + case ldb < max(1, nrhs): + panic(badLdB) + } + + // Quick return if possible. + if n == 0 || nrhs == 0 { + return + } + + switch { + case len(a) < (n-1)*lda+n: + panic(shortA) + case len(b) < (n-1)*ldb+nrhs: + panic(shortB) + case len(ipiv) != n: + panic(badLenIpiv) + } + + bi := blas64.Implementation() + + if trans == blas.NoTrans { + // Solve A * X = B. + impl.Dlaswp(nrhs, b, ldb, 0, n-1, ipiv, 1) + // Solve L * X = B, updating b. + bi.Dtrsm(blas.Left, blas.Lower, blas.NoTrans, blas.Unit, + n, nrhs, 1, a, lda, b, ldb) + // Solve U * X = B, updating b. + bi.Dtrsm(blas.Left, blas.Upper, blas.NoTrans, blas.NonUnit, + n, nrhs, 1, a, lda, b, ldb) + return + } + // Solve Aᵀ * X = B. + // Solve Uᵀ * X = B, updating b. + bi.Dtrsm(blas.Left, blas.Upper, blas.Trans, blas.NonUnit, + n, nrhs, 1, a, lda, b, ldb) + // Solve Lᵀ * X = B, updating b. + bi.Dtrsm(blas.Left, blas.Lower, blas.Trans, blas.Unit, + n, nrhs, 1, a, lda, b, ldb) + impl.Dlaswp(nrhs, b, ldb, 0, n-1, ipiv, -1) +} diff --git a/vendor/gonum.org/v1/gonum/lapack/gonum/dgghrd.go b/vendor/gonum.org/v1/gonum/lapack/gonum/dgghrd.go new file mode 100644 index 0000000000..c9d6b4d1b3 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dgghrd.go @@ -0,0 +1,125 @@ +// Copyright ©2023 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gonum + +import ( + "gonum.org/v1/gonum/blas" + "gonum.org/v1/gonum/blas/blas64" + "gonum.org/v1/gonum/lapack" +) + +// Dgghrd reduces a pair of real matrices (A,B) to generalized upper Hessenberg +// form using orthogonal transformations, where A is a general matrix and B is +// upper triangular. +// +// This subroutine simultaneously reduces A to a Hessenberg matrix H +// +// Qᵀ*A*Z = H, +// +// and transforms B to another upper triangular matrix T +// +// Qᵀ*B*Z = T. +// +// The orthogonal matrices Q and Z are determined as products of Givens +// rotations. They may either be formed explicitly (lapack.OrthoExplicit), or +// they may be postmultiplied into input matrices Q1 and Z1 +// (lapack.OrthoPostmul), so that +// +// Q1 * A * Z1ᵀ = (Q1*Q) * H * (Z1*Z)ᵀ, +// Q1 * B * Z1ᵀ = (Q1*Q) * T * (Z1*Z)ᵀ. +// +// ilo and ihi determine the block of A that will be reduced. It must hold that +// +// - 0 <= ilo <= ihi < n if n > 0, +// - ilo == 0 and ihi == -1 if n == 0, +// +// otherwise Dgghrd will panic. +// +// Dgghrd is an internal routine. It is exported for testing purposes. +func (impl Implementation) Dgghrd(compq, compz lapack.OrthoComp, n, ilo, ihi int, a []float64, lda int, b []float64, ldb int, q []float64, ldq int, z []float64, ldz int) { + switch { + case compq != lapack.OrthoNone && compq != lapack.OrthoExplicit && compq != lapack.OrthoPostmul: + panic(badOrthoComp) + case compz != lapack.OrthoNone && compz != lapack.OrthoExplicit && compz != lapack.OrthoPostmul: + panic(badOrthoComp) + case n < 0: + panic(nLT0) + case ilo < 0 || max(0, n-1) < ilo: + panic(badIlo) + case ihi < min(ilo, n-1) || n <= ihi: + panic(badIhi) + case lda < max(1, n): + panic(badLdA) + case ldb < max(1, n): + panic(badLdB) + case (compq != lapack.OrthoNone && ldq < n) || ldq < 1: + panic(badLdQ) + case (compz != lapack.OrthoNone && ldz < n) || ldz < 1: + panic(badLdZ) + } + + // Quick return if possible. + if n == 0 { + return + } + + switch { + case len(a) < (n-1)*lda+n: + panic(shortA) + case len(b) < (n-1)*ldb+n: + panic(shortB) + case compq != lapack.OrthoNone && len(q) < (n-1)*ldq+n: + panic(shortQ) + case compz != lapack.OrthoNone && len(z) < (n-1)*ldz+n: + panic(shortZ) + } + + if compq == lapack.OrthoExplicit { + impl.Dlaset(blas.All, n, n, 0, 1, q, ldq) + } + if compz == lapack.OrthoExplicit { + impl.Dlaset(blas.All, n, n, 0, 1, z, ldz) + } + + // Quick return if possible. + if n == 1 { + return + } + + // Zero out lower triangle of B. + for i := 1; i < n; i++ { + for j := 0; j < i; j++ { + b[i*ldb+j] = 0 + } + } + bi := blas64.Implementation() + // Reduce A and B. + for jcol := ilo; jcol <= ihi-2; jcol++ { + for jrow := ihi; jrow >= jcol+2; jrow-- { + // Step 1: rotate rows jrow-1, jrow to kill A[jrow,jcol]. + var c, s float64 + c, s, a[(jrow-1)*lda+jcol] = impl.Dlartg(a[(jrow-1)*lda+jcol], a[jrow*lda+jcol]) + a[jrow*lda+jcol] = 0 + + bi.Drot(n-jcol-1, a[(jrow-1)*lda+jcol+1:], 1, a[jrow*lda+jcol+1:], 1, c, s) + bi.Drot(n+2-jrow-1, b[(jrow-1)*ldb+jrow-1:], 1, b[jrow*ldb+jrow-1:], 1, c, s) + + if compq != lapack.OrthoNone { + bi.Drot(n, q[jrow-1:], ldq, q[jrow:], ldq, c, s) + } + + // Step 2: rotate columns jrow, jrow-1 to kill B[jrow,jrow-1]. + c, s, b[jrow*ldb+jrow] = impl.Dlartg(b[jrow*ldb+jrow], b[jrow*ldb+jrow-1]) + b[jrow*ldb+jrow-1] = 0 + + bi.Drot(ihi+1, a[jrow:], lda, a[jrow-1:], lda, c, s) + bi.Drot(jrow, b[jrow:], ldb, b[jrow-1:], ldb, c, s) + + if compz != lapack.OrthoNone { + bi.Drot(n, z[jrow:], ldz, z[jrow-1:], ldz, c, s) + } + } + } +} diff --git a/vendor/gonum.org/v1/gonum/lapack/gonum/dggsvd3.go b/vendor/gonum.org/v1/gonum/lapack/gonum/dggsvd3.go new file mode 100644 index 0000000000..cfe10efa9d --- /dev/null +++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dggsvd3.go @@ -0,0 +1,258 @@ +// Copyright ©2017 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gonum + +import ( + "math" + + "gonum.org/v1/gonum/blas/blas64" + "gonum.org/v1/gonum/lapack" +) + +// Dggsvd3 computes the generalized singular value decomposition (GSVD) +// of an m×n matrix A and p×n matrix B: +// +// Uᵀ*A*Q = D1*[ 0 R ] +// +// Vᵀ*B*Q = D2*[ 0 R ] +// +// where U, V and Q are orthogonal matrices. +// +// Dggsvd3 returns k and l, the dimensions of the sub-blocks. k+l +// is the effective numerical rank of the (m+p)×n matrix [ Aᵀ Bᵀ ]ᵀ. +// R is a (k+l)×(k+l) nonsingular upper triangular matrix, D1 and +// D2 are m×(k+l) and p×(k+l) diagonal matrices and of the following +// structures, respectively: +// +// If m-k-l >= 0, +// +// k l +// D1 = k [ I 0 ] +// l [ 0 C ] +// m-k-l [ 0 0 ] +// +// k l +// D2 = l [ 0 S ] +// p-l [ 0 0 ] +// +// n-k-l k l +// [ 0 R ] = k [ 0 R11 R12 ] k +// l [ 0 0 R22 ] l +// +// where +// +// C = diag( alpha_k, ... , alpha_{k+l} ), +// S = diag( beta_k, ... , beta_{k+l} ), +// C^2 + S^2 = I. +// +// R is stored in +// +// A[0:k+l, n-k-l:n] +// +// on exit. +// +// If m-k-l < 0, +// +// k m-k k+l-m +// D1 = k [ I 0 0 ] +// m-k [ 0 C 0 ] +// +// k m-k k+l-m +// D2 = m-k [ 0 S 0 ] +// k+l-m [ 0 0 I ] +// p-l [ 0 0 0 ] +// +// n-k-l k m-k k+l-m +// [ 0 R ] = k [ 0 R11 R12 R13 ] +// m-k [ 0 0 R22 R23 ] +// k+l-m [ 0 0 0 R33 ] +// +// where +// +// C = diag( alpha_k, ... , alpha_m ), +// S = diag( beta_k, ... , beta_m ), +// C^2 + S^2 = I. +// +// R = [ R11 R12 R13 ] is stored in A[1:m, n-k-l+1:n] +// [ 0 R22 R23 ] +// +// and R33 is stored in +// +// B[m-k:l, n+m-k-l:n] on exit. +// +// Dggsvd3 computes C, S, R, and optionally the orthogonal transformation +// matrices U, V and Q. +// +// jobU, jobV and jobQ are options for computing the orthogonal matrices. The behavior +// is as follows +// +// jobU == lapack.GSVDU Compute orthogonal matrix U +// jobU == lapack.GSVDNone Do not compute orthogonal matrix. +// +// The behavior is the same for jobV and jobQ with the exception that instead of +// lapack.GSVDU these accept lapack.GSVDV and lapack.GSVDQ respectively. +// The matrices U, V and Q must be m×m, p×p and n×n respectively unless the +// relevant job parameter is lapack.GSVDNone. +// +// alpha and beta must have length n or Dggsvd3 will panic. On exit, alpha and +// beta contain the generalized singular value pairs of A and B +// +// alpha[0:k] = 1, +// beta[0:k] = 0, +// +// if m-k-l >= 0, +// +// alpha[k:k+l] = diag(C), +// beta[k:k+l] = diag(S), +// +// if m-k-l < 0, +// +// alpha[k:m]= C, alpha[m:k+l]= 0 +// beta[k:m] = S, beta[m:k+l] = 1. +// +// if k+l < n, +// +// alpha[k+l:n] = 0 and +// beta[k+l:n] = 0. +// +// On exit, iwork contains the permutation required to sort alpha descending. +// +// iwork must have length n, work must have length at least max(1, lwork), and +// lwork must be -1 or greater than n, otherwise Dggsvd3 will panic. If +// lwork is -1, work[0] holds the optimal lwork on return, but Dggsvd3 does +// not perform the GSVD. +func (impl Implementation) Dggsvd3(jobU, jobV, jobQ lapack.GSVDJob, m, n, p int, a []float64, lda int, b []float64, ldb int, alpha, beta, u []float64, ldu int, v []float64, ldv int, q []float64, ldq int, work []float64, lwork int, iwork []int) (k, l int, ok bool) { + wantu := jobU == lapack.GSVDU + wantv := jobV == lapack.GSVDV + wantq := jobQ == lapack.GSVDQ + switch { + case !wantu && jobU != lapack.GSVDNone: + panic(badGSVDJob + "U") + case !wantv && jobV != lapack.GSVDNone: + panic(badGSVDJob + "V") + case !wantq && jobQ != lapack.GSVDNone: + panic(badGSVDJob + "Q") + case m < 0: + panic(mLT0) + case n < 0: + panic(nLT0) + case p < 0: + panic(pLT0) + case lda < max(1, n): + panic(badLdA) + case ldb < max(1, n): + panic(badLdB) + case ldu < 1, wantu && ldu < m: + panic(badLdU) + case ldv < 1, wantv && ldv < p: + panic(badLdV) + case ldq < 1, wantq && ldq < n: + panic(badLdQ) + case len(iwork) < n: + panic(shortWork) + case lwork < 1 && lwork != -1: + panic(badLWork) + case len(work) < max(1, lwork): + panic(shortWork) + } + + // Determine optimal work length. + impl.Dggsvp3(jobU, jobV, jobQ, + m, p, n, + a, lda, + b, ldb, + 0, 0, + u, ldu, + v, ldv, + q, ldq, + iwork, + work, work, -1) + lwkopt := n + int(work[0]) + lwkopt = max(lwkopt, 2*n) + lwkopt = max(lwkopt, 1) + work[0] = float64(lwkopt) + if lwork == -1 { + return 0, 0, true + } + + switch { + case len(a) < (m-1)*lda+n: + panic(shortA) + case len(b) < (p-1)*ldb+n: + panic(shortB) + case wantu && len(u) < (m-1)*ldu+m: + panic(shortU) + case wantv && len(v) < (p-1)*ldv+p: + panic(shortV) + case wantq && len(q) < (n-1)*ldq+n: + panic(shortQ) + case len(alpha) != n: + panic(badLenAlpha) + case len(beta) != n: + panic(badLenBeta) + } + + // Compute the Frobenius norm of matrices A and B. + anorm := impl.Dlange(lapack.Frobenius, m, n, a, lda, nil) + bnorm := impl.Dlange(lapack.Frobenius, p, n, b, ldb, nil) + + // Get machine precision and set up threshold for determining + // the effective numerical rank of the matrices A and B. + tola := float64(max(m, n)) * math.Max(anorm, dlamchS) * dlamchP + tolb := float64(max(p, n)) * math.Max(bnorm, dlamchS) * dlamchP + + // Preprocessing. + k, l = impl.Dggsvp3(jobU, jobV, jobQ, + m, p, n, + a, lda, + b, ldb, + tola, tolb, + u, ldu, + v, ldv, + q, ldq, + iwork, + work[:n], work[n:], lwork-n) + + // Compute the GSVD of two upper "triangular" matrices. + _, ok = impl.Dtgsja(jobU, jobV, jobQ, + m, p, n, + k, l, + a, lda, + b, ldb, + tola, tolb, + alpha, beta, + u, ldu, + v, ldv, + q, ldq, + work) + + // Sort the singular values and store the pivot indices in iwork + // Copy alpha to work, then sort alpha in work. + bi := blas64.Implementation() + bi.Dcopy(n, alpha, 1, work[:n], 1) + ibnd := min(l, m-k) + for i := 0; i < ibnd; i++ { + // Scan for largest alpha_{k+i}. + isub := i + smax := work[k+i] + for j := i + 1; j < ibnd; j++ { + if v := work[k+j]; v > smax { + isub = j + smax = v + } + } + if isub != i { + work[k+isub] = work[k+i] + work[k+i] = smax + iwork[k+i] = k + isub + } else { + iwork[k+i] = k + i + } + } + + work[0] = float64(lwkopt) + + return k, l, ok +} diff --git a/vendor/gonum.org/v1/gonum/lapack/gonum/dggsvp3.go b/vendor/gonum.org/v1/gonum/lapack/gonum/dggsvp3.go new file mode 100644 index 0000000000..f7f04c764f --- /dev/null +++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dggsvp3.go @@ -0,0 +1,286 @@ +// Copyright ©2017 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gonum + +import ( + "math" + + "gonum.org/v1/gonum/blas" + "gonum.org/v1/gonum/lapack" +) + +// Dggsvp3 computes orthogonal matrices U, V and Q such that +// +// n-k-l k l +// Uᵀ*A*Q = k [ 0 A12 A13 ] if m-k-l >= 0; +// l [ 0 0 A23 ] +// m-k-l [ 0 0 0 ] +// +// n-k-l k l +// Uᵀ*A*Q = k [ 0 A12 A13 ] if m-k-l < 0; +// m-k [ 0 0 A23 ] +// +// n-k-l k l +// Vᵀ*B*Q = l [ 0 0 B13 ] +// p-l [ 0 0 0 ] +// +// where the k×k matrix A12 and l×l matrix B13 are non-singular +// upper triangular. A23 is l×l upper triangular if m-k-l >= 0, +// otherwise A23 is (m-k)×l upper trapezoidal. +// +// Dggsvp3 returns k and l, the dimensions of the sub-blocks. k+l +// is the effective numerical rank of the (m+p)×n matrix [ Aᵀ Bᵀ ]ᵀ. +// +// jobU, jobV and jobQ are options for computing the orthogonal matrices. The behavior +// is as follows +// +// jobU == lapack.GSVDU Compute orthogonal matrix U +// jobU == lapack.GSVDNone Do not compute orthogonal matrix. +// +// The behavior is the same for jobV and jobQ with the exception that instead of +// lapack.GSVDU these accept lapack.GSVDV and lapack.GSVDQ respectively. +// The matrices U, V and Q must be m×m, p×p and n×n respectively unless the +// relevant job parameter is lapack.GSVDNone. +// +// tola and tolb are the convergence criteria for the Jacobi-Kogbetliantz +// iteration procedure. Generally, they are the same as used in the preprocessing +// step, for example, +// +// tola = max(m, n)*norm(A)*eps, +// tolb = max(p, n)*norm(B)*eps. +// +// Where eps is the machine epsilon. +// +// iwork must have length n, work must have length at least max(1, lwork), and +// lwork must be -1 or greater than zero, otherwise Dggsvp3 will panic. +// +// Dggsvp3 is an internal routine. It is exported for testing purposes. +func (impl Implementation) Dggsvp3(jobU, jobV, jobQ lapack.GSVDJob, m, p, n int, a []float64, lda int, b []float64, ldb int, tola, tolb float64, u []float64, ldu int, v []float64, ldv int, q []float64, ldq int, iwork []int, tau, work []float64, lwork int) (k, l int) { + wantu := jobU == lapack.GSVDU + wantv := jobV == lapack.GSVDV + wantq := jobQ == lapack.GSVDQ + switch { + case !wantu && jobU != lapack.GSVDNone: + panic(badGSVDJob + "U") + case !wantv && jobV != lapack.GSVDNone: + panic(badGSVDJob + "V") + case !wantq && jobQ != lapack.GSVDNone: + panic(badGSVDJob + "Q") + case m < 0: + panic(mLT0) + case p < 0: + panic(pLT0) + case n < 0: + panic(nLT0) + case lda < max(1, n): + panic(badLdA) + case ldb < max(1, n): + panic(badLdB) + case ldu < 1, wantu && ldu < m: + panic(badLdU) + case ldv < 1, wantv && ldv < p: + panic(badLdV) + case ldq < 1, wantq && ldq < n: + panic(badLdQ) + case len(iwork) != n: + panic(shortWork) + case lwork < 1 && lwork != -1: + panic(badLWork) + case len(work) < max(1, lwork): + panic(shortWork) + } + + var lwkopt int + impl.Dgeqp3(p, n, b, ldb, iwork, tau, work, -1) + lwkopt = int(work[0]) + if wantv { + lwkopt = max(lwkopt, p) + } + lwkopt = max(lwkopt, min(n, p)) + lwkopt = max(lwkopt, m) + if wantq { + lwkopt = max(lwkopt, n) + } + impl.Dgeqp3(m, n, a, lda, iwork, tau, work, -1) + lwkopt = max(lwkopt, int(work[0])) + lwkopt = max(1, lwkopt) + if lwork == -1 { + work[0] = float64(lwkopt) + return 0, 0 + } + + switch { + case len(a) < (m-1)*lda+n: + panic(shortA) + case len(b) < (p-1)*ldb+n: + panic(shortB) + case wantu && len(u) < (m-1)*ldu+m: + panic(shortU) + case wantv && len(v) < (p-1)*ldv+p: + panic(shortV) + case wantq && len(q) < (n-1)*ldq+n: + panic(shortQ) + case len(tau) < n: + // tau check must come after lwkopt query since + // the Dggsvd3 call for lwkopt query may have + // lwork == -1, and tau is provided by work. + panic(shortTau) + } + + const forward = true + + // QR with column pivoting of B: B*P = V*[ S11 S12 ]. + // [ 0 0 ] + for i := range iwork[:n] { + iwork[i] = 0 + } + impl.Dgeqp3(p, n, b, ldb, iwork, tau, work, lwork) + + // Update A := A*P. + impl.Dlapmt(forward, m, n, a, lda, iwork) + + // Determine the effective rank of matrix B. + for i := 0; i < min(p, n); i++ { + if math.Abs(b[i*ldb+i]) > tolb { + l++ + } + } + + if wantv { + // Copy the details of V, and form V. + impl.Dlaset(blas.All, p, p, 0, 0, v, ldv) + if p > 1 { + impl.Dlacpy(blas.Lower, p-1, min(p, n), b[ldb:], ldb, v[ldv:], ldv) + } + impl.Dorg2r(p, p, min(p, n), v, ldv, tau[:min(p, n)], work) + } + + // Clean up B. + for i := 1; i < l; i++ { + r := b[i*ldb : i*ldb+i] + for j := range r { + r[j] = 0 + } + } + if p > l { + impl.Dlaset(blas.All, p-l, n, 0, 0, b[l*ldb:], ldb) + } + + if wantq { + // Set Q = I and update Q := Q*P. + impl.Dlaset(blas.All, n, n, 0, 1, q, ldq) + impl.Dlapmt(forward, n, n, q, ldq, iwork) + } + + if p >= l && n != l { + // RQ factorization of [ S11 S12 ]: [ S11 S12 ] = [ 0 S12 ]*Z. + impl.Dgerq2(l, n, b, ldb, tau, work) + + // Update A := A*Zᵀ. + impl.Dormr2(blas.Right, blas.Trans, m, n, l, b, ldb, tau, a, lda, work) + + if wantq { + // Update Q := Q*Zᵀ. + impl.Dormr2(blas.Right, blas.Trans, n, n, l, b, ldb, tau, q, ldq, work) + } + + // Clean up B. + impl.Dlaset(blas.All, l, n-l, 0, 0, b, ldb) + for i := 1; i < l; i++ { + r := b[i*ldb+n-l : i*ldb+i+n-l] + for j := range r { + r[j] = 0 + } + } + } + + // Let N-L L + // A = [ A11 A12 ] M, + // + // then the following does the complete QR decomposition of A11: + // + // A11 = U*[ 0 T12 ]*P1ᵀ. + // [ 0 0 ] + for i := range iwork[:n-l] { + iwork[i] = 0 + } + impl.Dgeqp3(m, n-l, a, lda, iwork[:n-l], tau, work, lwork) + + // Determine the effective rank of A11. + for i := 0; i < min(m, n-l); i++ { + if math.Abs(a[i*lda+i]) > tola { + k++ + } + } + + // Update A12 := Uᵀ*A12, where A12 = A[0:m, n-l:n]. + impl.Dorm2r(blas.Left, blas.Trans, m, l, min(m, n-l), a, lda, tau[:min(m, n-l)], a[n-l:], lda, work) + + if wantu { + // Copy the details of U, and form U. + impl.Dlaset(blas.All, m, m, 0, 0, u, ldu) + if m > 1 { + impl.Dlacpy(blas.Lower, m-1, min(m, n-l), a[lda:], lda, u[ldu:], ldu) + } + k := min(m, n-l) + impl.Dorg2r(m, m, k, u, ldu, tau[:k], work) + } + + if wantq { + // Update Q[0:n, 0:n-l] := Q[0:n, 0:n-l]*P1. + impl.Dlapmt(forward, n, n-l, q, ldq, iwork[:n-l]) + } + + // Clean up A: set the strictly lower triangular part of + // A[0:k, 0:k] = 0, and A[k:m, 0:n-l] = 0. + for i := 1; i < k; i++ { + r := a[i*lda : i*lda+i] + for j := range r { + r[j] = 0 + } + } + if m > k { + impl.Dlaset(blas.All, m-k, n-l, 0, 0, a[k*lda:], lda) + } + + if n-l > k { + // RQ factorization of [ T11 T12 ] = [ 0 T12 ]*Z1. + impl.Dgerq2(k, n-l, a, lda, tau, work) + + if wantq { + // Update Q[0:n, 0:n-l] := Q[0:n, 0:n-l]*Z1ᵀ. + impl.Dorm2r(blas.Right, blas.Trans, n, n-l, k, a, lda, tau[:k], q, ldq, work) + } + + // Clean up A. + impl.Dlaset(blas.All, k, n-l-k, 0, 0, a, lda) + for i := 1; i < k; i++ { + r := a[i*lda+n-k-l : i*lda+i+n-k-l] + for j := range r { + a[j] = 0 + } + } + } + + if m > k { + // QR factorization of A[k:m, n-l:n]. + impl.Dgeqr2(m-k, l, a[k*lda+n-l:], lda, tau[:min(m-k, l)], work) + if wantu { + // Update U[:, k:m) := U[:, k:m]*U1. + impl.Dorm2r(blas.Right, blas.NoTrans, m, m-k, min(m-k, l), a[k*lda+n-l:], lda, tau[:min(m-k, l)], u[k:], ldu, work) + } + + // Clean up A. + for i := k + 1; i < m; i++ { + r := a[i*lda+n-l : i*lda+min(n-l+i-k, n)] + for j := range r { + r[j] = 0 + } + } + } + + work[0] = float64(lwkopt) + return k, l +} diff --git a/vendor/gonum.org/v1/gonum/lapack/gonum/dgtsv.go b/vendor/gonum.org/v1/gonum/lapack/gonum/dgtsv.go new file mode 100644 index 0000000000..944af1a607 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dgtsv.go @@ -0,0 +1,101 @@ +// Copyright ©2020 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gonum + +import "math" + +// Dgtsv solves the equation +// +// A * X = B +// +// where A is an n×n tridiagonal matrix. It uses Gaussian elimination with +// partial pivoting. The equation Aᵀ * X = B may be solved by swapping the +// arguments for du and dl. +// +// On entry, dl, d and du contain the sub-diagonal, the diagonal and the +// super-diagonal, respectively, of A. On return, the first n-2 elements of dl, +// the first n-1 elements of du and the first n elements of d may be +// overwritten. +// +// On entry, b contains the n×nrhs right-hand side matrix B. On return, b will +// be overwritten. If ok is true, it will be overwritten by the solution matrix X. +// +// Dgtsv returns whether the solution X has been successfully computed. +func (impl Implementation) Dgtsv(n, nrhs int, dl, d, du []float64, b []float64, ldb int) (ok bool) { + switch { + case n < 0: + panic(nLT0) + case nrhs < 0: + panic(nrhsLT0) + case ldb < max(1, nrhs): + panic(badLdB) + } + + if n == 0 || nrhs == 0 { + return true + } + + switch { + case len(dl) < n-1: + panic(shortDL) + case len(d) < n: + panic(shortD) + case len(du) < n-1: + panic(shortDU) + case len(b) < (n-1)*ldb+nrhs: + panic(shortB) + } + + dl = dl[:n-1] + d = d[:n] + du = du[:n-1] + + for i := 0; i < n-1; i++ { + if math.Abs(d[i]) >= math.Abs(dl[i]) { + // No row interchange required. + if d[i] == 0 { + return false + } + fact := dl[i] / d[i] + d[i+1] -= fact * du[i] + for j := 0; j < nrhs; j++ { + b[(i+1)*ldb+j] -= fact * b[i*ldb+j] + } + dl[i] = 0 + } else { + // Interchange rows i and i+1. + fact := d[i] / dl[i] + d[i] = dl[i] + tmp := d[i+1] + d[i+1] = du[i] - fact*tmp + du[i] = tmp + if i+1 < n-1 { + dl[i] = du[i+1] + du[i+1] = -fact * dl[i] + } + for j := 0; j < nrhs; j++ { + tmp = b[i*ldb+j] + b[i*ldb+j] = b[(i+1)*ldb+j] + b[(i+1)*ldb+j] = tmp - fact*b[(i+1)*ldb+j] + } + } + } + if d[n-1] == 0 { + return false + } + + // Back solve with the matrix U from the factorization. + for j := 0; j < nrhs; j++ { + b[(n-1)*ldb+j] /= d[n-1] + if n > 1 { + b[(n-2)*ldb+j] = (b[(n-2)*ldb+j] - du[n-2]*b[(n-1)*ldb+j]) / d[n-2] + } + for i := n - 3; i >= 0; i-- { + b[i*ldb+j] = (b[i*ldb+j] - du[i]*b[(i+1)*ldb+j] - dl[i]*b[(i+2)*ldb+j]) / d[i] + } + } + + return true +} diff --git a/vendor/gonum.org/v1/gonum/lapack/gonum/dhseqr.go b/vendor/gonum.org/v1/gonum/lapack/gonum/dhseqr.go new file mode 100644 index 0000000000..beccf132b7 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dhseqr.go @@ -0,0 +1,272 @@ +// Copyright ©2016 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gonum + +import ( + "math" + + "gonum.org/v1/gonum/blas" + "gonum.org/v1/gonum/lapack" +) + +// Dhseqr computes the eigenvalues of an n×n Hessenberg matrix H and, +// optionally, the matrices T and Z from the Schur decomposition +// +// H = Z T Zᵀ, +// +// where T is an n×n upper quasi-triangular matrix (the Schur form), and Z is +// the n×n orthogonal matrix of Schur vectors. +// +// Optionally Z may be postmultiplied into an input orthogonal matrix Q so that +// this routine can give the Schur factorization of a matrix A which has been +// reduced to the Hessenberg form H by the orthogonal matrix Q: +// +// A = Q H Qᵀ = (QZ) T (QZ)ᵀ. +// +// If job == lapack.EigenvaluesOnly, only the eigenvalues will be computed. +// If job == lapack.EigenvaluesAndSchur, the eigenvalues and the Schur form T will +// be computed. +// For other values of job Dhseqr will panic. +// +// If compz == lapack.SchurNone, no Schur vectors will be computed and Z will not be +// referenced. +// If compz == lapack.SchurHess, on return Z will contain the matrix of Schur +// vectors of H. +// If compz == lapack.SchurOrig, on entry z is assumed to contain the orthogonal +// matrix Q that is the identity except for the submatrix +// Q[ilo:ihi+1,ilo:ihi+1]. On return z will be updated to the product Q*Z. +// +// ilo and ihi determine the block of H on which Dhseqr operates. It is assumed +// that H is already upper triangular in rows and columns [0:ilo] and [ihi+1:n], +// although it will be only checked that the block is isolated, that is, +// +// ilo == 0 or H[ilo,ilo-1] == 0, +// ihi == n-1 or H[ihi+1,ihi] == 0, +// +// and Dhseqr will panic otherwise. ilo and ihi are typically set by a previous +// call to Dgebal, otherwise they should be set to 0 and n-1, respectively. It +// must hold that +// +// 0 <= ilo <= ihi < n if n > 0, +// ilo == 0 and ihi == -1 if n == 0. +// +// wr and wi must have length n. +// +// work must have length at least lwork and lwork must be at least max(1,n) +// otherwise Dhseqr will panic. The minimum lwork delivers very good and +// sometimes optimal performance, although lwork as large as 11*n may be +// required. On return, work[0] will contain the optimal value of lwork. +// +// If lwork is -1, instead of performing Dhseqr, the function only estimates the +// optimal workspace size and stores it into work[0]. Neither h nor z are +// accessed. +// +// unconverged indicates whether Dhseqr computed all the eigenvalues. +// +// If unconverged == 0, all the eigenvalues have been computed and their real +// and imaginary parts will be stored on return in wr and wi, respectively. If +// two eigenvalues are computed as a complex conjugate pair, they are stored in +// consecutive elements of wr and wi, say the i-th and (i+1)th, with wi[i] > 0 +// and wi[i+1] < 0. +// +// If unconverged == 0 and job == lapack.EigenvaluesAndSchur, on return H will +// contain the upper quasi-triangular matrix T from the Schur decomposition (the +// Schur form). 2×2 diagonal blocks (corresponding to complex conjugate pairs of +// eigenvalues) will be returned in standard form, with +// +// H[i,i] == H[i+1,i+1], +// +// and +// +// H[i+1,i]*H[i,i+1] < 0. +// +// The eigenvalues will be stored in wr and wi in the same order as on the +// diagonal of the Schur form returned in H, with +// +// wr[i] = H[i,i], +// +// and, if H[i:i+2,i:i+2] is a 2×2 diagonal block, +// +// wi[i] = sqrt(-H[i+1,i]*H[i,i+1]), +// wi[i+1] = -wi[i]. +// +// If unconverged == 0 and job == lapack.EigenvaluesOnly, the contents of h +// on return is unspecified. +// +// If unconverged > 0, some eigenvalues have not converged, and the blocks +// [0:ilo] and [unconverged:n] of wr and wi will contain those eigenvalues which +// have been successfully computed. Failures are rare. +// +// If unconverged > 0 and job == lapack.EigenvaluesOnly, on return the +// remaining unconverged eigenvalues are the eigenvalues of the upper Hessenberg +// matrix H[ilo:unconverged,ilo:unconverged]. +// +// If unconverged > 0 and job == lapack.EigenvaluesAndSchur, then on +// return +// +// (initial H) U = U (final H), (*) +// +// where U is an orthogonal matrix. The final H is upper Hessenberg and +// H[unconverged:ihi+1,unconverged:ihi+1] is upper quasi-triangular. +// +// If unconverged > 0 and compz == lapack.SchurOrig, then on return +// +// (final Z) = (initial Z) U, +// +// where U is the orthogonal matrix in (*) regardless of the value of job. +// +// If unconverged > 0 and compz == lapack.SchurHess, then on return +// +// (final Z) = U, +// +// where U is the orthogonal matrix in (*) regardless of the value of job. +// +// References: +// +// [1] R. Byers. LAPACK 3.1 xHSEQR: Tuning and Implementation Notes on the +// Small Bulge Multi-Shift QR Algorithm with Aggressive Early Deflation. +// LAPACK Working Note 187 (2007) +// URL: http://www.netlib.org/lapack/lawnspdf/lawn187.pdf +// [2] K. Braman, R. Byers, R. Mathias. The Multishift QR Algorithm. Part I: +// Maintaining Well-Focused Shifts and Level 3 Performance. SIAM J. Matrix +// Anal. Appl. 23(4) (2002), pp. 929—947 +// URL: http://dx.doi.org/10.1137/S0895479801384573 +// [3] K. Braman, R. Byers, R. Mathias. The Multishift QR Algorithm. Part II: +// Aggressive Early Deflation. SIAM J. Matrix Anal. Appl. 23(4) (2002), pp. 948—973 +// URL: http://dx.doi.org/10.1137/S0895479801384585 +// +// Dhseqr is an internal routine. It is exported for testing purposes. +func (impl Implementation) Dhseqr(job lapack.SchurJob, compz lapack.SchurComp, n, ilo, ihi int, h []float64, ldh int, wr, wi []float64, z []float64, ldz int, work []float64, lwork int) (unconverged int) { + wantt := job == lapack.EigenvaluesAndSchur + wantz := compz == lapack.SchurHess || compz == lapack.SchurOrig + + switch { + case job != lapack.EigenvaluesOnly && job != lapack.EigenvaluesAndSchur: + panic(badSchurJob) + case compz != lapack.SchurNone && compz != lapack.SchurHess && compz != lapack.SchurOrig: + panic(badSchurComp) + case n < 0: + panic(nLT0) + case ilo < 0 || max(0, n-1) < ilo: + panic(badIlo) + case ihi < min(ilo, n-1) || n <= ihi: + panic(badIhi) + case ldh < max(1, n): + panic(badLdH) + case ldz < 1, wantz && ldz < n: + panic(badLdZ) + case lwork < max(1, n) && lwork != -1: + panic(badLWork) + case len(work) < max(1, lwork): + panic(shortWork) + } + + // Quick return if possible. + if n == 0 { + work[0] = 1 + return 0 + } + + // Quick return in case of a workspace query. + if lwork == -1 { + impl.Dlaqr04(wantt, wantz, n, ilo, ihi, h, ldh, wr, wi, ilo, ihi, z, ldz, work, -1, 1) + work[0] = math.Max(float64(n), work[0]) + return 0 + } + + switch { + case len(h) < (n-1)*ldh+n: + panic(shortH) + case wantz && len(z) < (n-1)*ldz+n: + panic(shortZ) + case len(wr) < n: + panic(shortWr) + case len(wi) < n: + panic(shortWi) + } + + const ( + // Matrices of order ntiny or smaller must be processed by + // Dlahqr because of insufficient subdiagonal scratch space. + // This is a hard limit. + ntiny = 15 + + // nl is the size of a local workspace to help small matrices + // through a rare Dlahqr failure. nl > ntiny is required and + // nl <= nmin = Ilaenv(ispec=12,...) is recommended (the default + // value of nmin is 75). Using nl = 49 allows up to six + // simultaneous shifts and a 16×16 deflation window. + nl = 49 + ) + + // Copy eigenvalues isolated by Dgebal. + for i := 0; i < ilo; i++ { + wr[i] = h[i*ldh+i] + wi[i] = 0 + } + for i := ihi + 1; i < n; i++ { + wr[i] = h[i*ldh+i] + wi[i] = 0 + } + + // Initialize Z to identity matrix if requested. + if compz == lapack.SchurHess { + impl.Dlaset(blas.All, n, n, 0, 1, z, ldz) + } + + // Quick return if possible. + if ilo == ihi { + wr[ilo] = h[ilo*ldh+ilo] + wi[ilo] = 0 + return 0 + } + + // Dlahqr/Dlaqr04 crossover point. + nmin := impl.Ilaenv(12, "DHSEQR", string(job)+string(compz), n, ilo, ihi, lwork) + nmin = max(ntiny, nmin) + + if n > nmin { + // Dlaqr0 for big matrices. + unconverged = impl.Dlaqr04(wantt, wantz, n, ilo, ihi, h, ldh, wr[:ihi+1], wi[:ihi+1], + ilo, ihi, z, ldz, work, lwork, 1) + } else { + // Dlahqr for small matrices. + unconverged = impl.Dlahqr(wantt, wantz, n, ilo, ihi, h, ldh, wr[:ihi+1], wi[:ihi+1], + ilo, ihi, z, ldz) + if unconverged > 0 { + // A rare Dlahqr failure! Dlaqr04 sometimes succeeds + // when Dlahqr fails. + kbot := unconverged + if n >= nl { + // Larger matrices have enough subdiagonal + // scratch space to call Dlaqr04 directly. + unconverged = impl.Dlaqr04(wantt, wantz, n, ilo, kbot, h, ldh, + wr[:ihi+1], wi[:ihi+1], ilo, ihi, z, ldz, work, lwork, 1) + } else { + // Tiny matrices don't have enough subdiagonal + // scratch space to benefit from Dlaqr04. Hence, + // tiny matrices must be copied into a larger + // array before calling Dlaqr04. + var hl [nl * nl]float64 + impl.Dlacpy(blas.All, n, n, h, ldh, hl[:], nl) + impl.Dlaset(blas.All, nl, nl-n, 0, 0, hl[n:], nl) + var workl [nl]float64 + unconverged = impl.Dlaqr04(wantt, wantz, nl, ilo, kbot, hl[:], nl, + wr[:ihi+1], wi[:ihi+1], ilo, ihi, z, ldz, workl[:], nl, 1) + work[0] = workl[0] + if wantt || unconverged > 0 { + impl.Dlacpy(blas.All, n, n, hl[:], nl, h, ldh) + } + } + } + } + // Zero out under the first subdiagonal, if necessary. + if (wantt || unconverged > 0) && n > 2 { + impl.Dlaset(blas.Lower, n-2, n-2, 0, 0, h[2*ldh:], ldh) + } + + work[0] = math.Max(float64(n), work[0]) + return unconverged +} diff --git a/vendor/gonum.org/v1/gonum/lapack/gonum/dlabrd.go b/vendor/gonum.org/v1/gonum/lapack/gonum/dlabrd.go new file mode 100644 index 0000000000..396242cc2d --- /dev/null +++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dlabrd.go @@ -0,0 +1,183 @@ +// Copyright ©2015 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gonum + +import ( + "gonum.org/v1/gonum/blas" + "gonum.org/v1/gonum/blas/blas64" +) + +// Dlabrd reduces the first NB rows and columns of a real general m×n matrix +// A to upper or lower bidiagonal form by an orthogonal transformation +// +// Q**T * A * P +// +// If m >= n, A is reduced to upper bidiagonal form and upon exit the elements +// on and below the diagonal in the first nb columns represent the elementary +// reflectors, and the elements above the diagonal in the first nb rows represent +// the matrix P. If m < n, A is reduced to lower bidiagonal form and the elements +// P is instead stored above the diagonal. +// +// The reduction to bidiagonal form is stored in d and e, where d are the diagonal +// elements, and e are the off-diagonal elements. +// +// The matrices Q and P are products of elementary reflectors +// +// Q = H_0 * H_1 * ... * H_{nb-1} +// P = G_0 * G_1 * ... * G_{nb-1} +// +// where +// +// H_i = I - tauQ[i] * v_i * v_iᵀ +// G_i = I - tauP[i] * u_i * u_iᵀ +// +// As an example, on exit the entries of A when m = 6, n = 5, and nb = 2 +// +// [ 1 1 u1 u1 u1] +// [v1 1 1 u2 u2] +// [v1 v2 a a a] +// [v1 v2 a a a] +// [v1 v2 a a a] +// [v1 v2 a a a] +// +// and when m = 5, n = 6, and nb = 2 +// +// [ 1 u1 u1 u1 u1 u1] +// [ 1 1 u2 u2 u2 u2] +// [v1 1 a a a a] +// [v1 v2 a a a a] +// [v1 v2 a a a a] +// +// Dlabrd also returns the matrices X and Y which are used with U and V to +// apply the transformation to the unreduced part of the matrix +// +// A := A - V*Yᵀ - X*Uᵀ +// +// and returns the matrices X and Y which are needed to apply the +// transformation to the unreduced part of A. +// +// X is an m×nb matrix, Y is an n×nb matrix. d, e, taup, and tauq must all have +// length at least nb. Dlabrd will panic if these size constraints are violated. +// +// Dlabrd is an internal routine. It is exported for testing purposes. +func (impl Implementation) Dlabrd(m, n, nb int, a []float64, lda int, d, e, tauQ, tauP, x []float64, ldx int, y []float64, ldy int) { + switch { + case m < 0: + panic(mLT0) + case n < 0: + panic(nLT0) + case nb < 0: + panic(nbLT0) + case nb > n: + panic(nbGTN) + case nb > m: + panic(nbGTM) + case lda < max(1, n): + panic(badLdA) + case ldx < max(1, nb): + panic(badLdX) + case ldy < max(1, nb): + panic(badLdY) + } + + if m == 0 || n == 0 || nb == 0 { + return + } + + switch { + case len(a) < (m-1)*lda+n: + panic(shortA) + case len(d) < nb: + panic(shortD) + case len(e) < nb: + panic(shortE) + case len(tauQ) < nb: + panic(shortTauQ) + case len(tauP) < nb: + panic(shortTauP) + case len(x) < (m-1)*ldx+nb: + panic(shortX) + case len(y) < (n-1)*ldy+nb: + panic(shortY) + } + + bi := blas64.Implementation() + + if m >= n { + // Reduce to upper bidiagonal form. + for i := 0; i < nb; i++ { + bi.Dgemv(blas.NoTrans, m-i, i, -1, a[i*lda:], lda, y[i*ldy:], 1, 1, a[i*lda+i:], lda) + bi.Dgemv(blas.NoTrans, m-i, i, -1, x[i*ldx:], ldx, a[i:], lda, 1, a[i*lda+i:], lda) + + a[i*lda+i], tauQ[i] = impl.Dlarfg(m-i, a[i*lda+i], a[min(i+1, m-1)*lda+i:], lda) + d[i] = a[i*lda+i] + if i < n-1 { + // Compute Y[i+1:n, i]. + a[i*lda+i] = 1 + bi.Dgemv(blas.Trans, m-i, n-i-1, 1, a[i*lda+i+1:], lda, a[i*lda+i:], lda, 0, y[(i+1)*ldy+i:], ldy) + bi.Dgemv(blas.Trans, m-i, i, 1, a[i*lda:], lda, a[i*lda+i:], lda, 0, y[i:], ldy) + bi.Dgemv(blas.NoTrans, n-i-1, i, -1, y[(i+1)*ldy:], ldy, y[i:], ldy, 1, y[(i+1)*ldy+i:], ldy) + bi.Dgemv(blas.Trans, m-i, i, 1, x[i*ldx:], ldx, a[i*lda+i:], lda, 0, y[i:], ldy) + bi.Dgemv(blas.Trans, i, n-i-1, -1, a[i+1:], lda, y[i:], ldy, 1, y[(i+1)*ldy+i:], ldy) + bi.Dscal(n-i-1, tauQ[i], y[(i+1)*ldy+i:], ldy) + + // Update A[i, i+1:n]. + bi.Dgemv(blas.NoTrans, n-i-1, i+1, -1, y[(i+1)*ldy:], ldy, a[i*lda:], 1, 1, a[i*lda+i+1:], 1) + bi.Dgemv(blas.Trans, i, n-i-1, -1, a[i+1:], lda, x[i*ldx:], 1, 1, a[i*lda+i+1:], 1) + + // Generate reflection P[i] to annihilate A[i, i+2:n]. + a[i*lda+i+1], tauP[i] = impl.Dlarfg(n-i-1, a[i*lda+i+1], a[i*lda+min(i+2, n-1):], 1) + e[i] = a[i*lda+i+1] + a[i*lda+i+1] = 1 + + // Compute X[i+1:m, i]. + bi.Dgemv(blas.NoTrans, m-i-1, n-i-1, 1, a[(i+1)*lda+i+1:], lda, a[i*lda+i+1:], 1, 0, x[(i+1)*ldx+i:], ldx) + bi.Dgemv(blas.Trans, n-i-1, i+1, 1, y[(i+1)*ldy:], ldy, a[i*lda+i+1:], 1, 0, x[i:], ldx) + bi.Dgemv(blas.NoTrans, m-i-1, i+1, -1, a[(i+1)*lda:], lda, x[i:], ldx, 1, x[(i+1)*ldx+i:], ldx) + bi.Dgemv(blas.NoTrans, i, n-i-1, 1, a[i+1:], lda, a[i*lda+i+1:], 1, 0, x[i:], ldx) + bi.Dgemv(blas.NoTrans, m-i-1, i, -1, x[(i+1)*ldx:], ldx, x[i:], ldx, 1, x[(i+1)*ldx+i:], ldx) + bi.Dscal(m-i-1, tauP[i], x[(i+1)*ldx+i:], ldx) + } + } + return + } + // Reduce to lower bidiagonal form. + for i := 0; i < nb; i++ { + // Update A[i,i:n] + bi.Dgemv(blas.NoTrans, n-i, i, -1, y[i*ldy:], ldy, a[i*lda:], 1, 1, a[i*lda+i:], 1) + bi.Dgemv(blas.Trans, i, n-i, -1, a[i:], lda, x[i*ldx:], 1, 1, a[i*lda+i:], 1) + + // Generate reflection P[i] to annihilate A[i, i+1:n] + a[i*lda+i], tauP[i] = impl.Dlarfg(n-i, a[i*lda+i], a[i*lda+min(i+1, n-1):], 1) + d[i] = a[i*lda+i] + if i < m-1 { + a[i*lda+i] = 1 + // Compute X[i+1:m, i]. + bi.Dgemv(blas.NoTrans, m-i-1, n-i, 1, a[(i+1)*lda+i:], lda, a[i*lda+i:], 1, 0, x[(i+1)*ldx+i:], ldx) + bi.Dgemv(blas.Trans, n-i, i, 1, y[i*ldy:], ldy, a[i*lda+i:], 1, 0, x[i:], ldx) + bi.Dgemv(blas.NoTrans, m-i-1, i, -1, a[(i+1)*lda:], lda, x[i:], ldx, 1, x[(i+1)*ldx+i:], ldx) + bi.Dgemv(blas.NoTrans, i, n-i, 1, a[i:], lda, a[i*lda+i:], 1, 0, x[i:], ldx) + bi.Dgemv(blas.NoTrans, m-i-1, i, -1, x[(i+1)*ldx:], ldx, x[i:], ldx, 1, x[(i+1)*ldx+i:], ldx) + bi.Dscal(m-i-1, tauP[i], x[(i+1)*ldx+i:], ldx) + + // Update A[i+1:m, i]. + bi.Dgemv(blas.NoTrans, m-i-1, i, -1, a[(i+1)*lda:], lda, y[i*ldy:], 1, 1, a[(i+1)*lda+i:], lda) + bi.Dgemv(blas.NoTrans, m-i-1, i+1, -1, x[(i+1)*ldx:], ldx, a[i:], lda, 1, a[(i+1)*lda+i:], lda) + + // Generate reflection Q[i] to annihilate A[i+2:m, i]. + a[(i+1)*lda+i], tauQ[i] = impl.Dlarfg(m-i-1, a[(i+1)*lda+i], a[min(i+2, m-1)*lda+i:], lda) + e[i] = a[(i+1)*lda+i] + a[(i+1)*lda+i] = 1 + + // Compute Y[i+1:n, i]. + bi.Dgemv(blas.Trans, m-i-1, n-i-1, 1, a[(i+1)*lda+i+1:], lda, a[(i+1)*lda+i:], lda, 0, y[(i+1)*ldy+i:], ldy) + bi.Dgemv(blas.Trans, m-i-1, i, 1, a[(i+1)*lda:], lda, a[(i+1)*lda+i:], lda, 0, y[i:], ldy) + bi.Dgemv(blas.NoTrans, n-i-1, i, -1, y[(i+1)*ldy:], ldy, y[i:], ldy, 1, y[(i+1)*ldy+i:], ldy) + bi.Dgemv(blas.Trans, m-i-1, i+1, 1, x[(i+1)*ldx:], ldx, a[(i+1)*lda+i:], lda, 0, y[i:], ldy) + bi.Dgemv(blas.Trans, i+1, n-i-1, -1, a[i+1:], lda, y[i:], ldy, 1, y[(i+1)*ldy+i:], ldy) + bi.Dscal(n-i-1, tauQ[i], y[(i+1)*ldy+i:], ldy) + } + } +} diff --git a/vendor/gonum.org/v1/gonum/lapack/gonum/dlacn2.go b/vendor/gonum.org/v1/gonum/lapack/gonum/dlacn2.go new file mode 100644 index 0000000000..cd6cf719d5 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dlacn2.go @@ -0,0 +1,136 @@ +// Copyright ©2015 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gonum + +import ( + "math" + + "gonum.org/v1/gonum/blas/blas64" +) + +// Dlacn2 estimates the 1-norm of an n×n matrix A using sequential updates with +// matrix-vector products provided externally. +// +// Dlacn2 is called sequentially and it returns the value of est and kase to be +// used on the next call. +// On the initial call, kase must be 0. +// In between calls, x must be overwritten by +// +// A * X if kase was returned as 1, +// Aᵀ * X if kase was returned as 2, +// +// and all other parameters must not be changed. +// On the final return, kase is returned as 0, v contains A*W where W is a +// vector, and est = norm(V)/norm(W) is a lower bound for 1-norm of A. +// +// v, x, and isgn must all have length n and n must be at least 1, otherwise +// Dlacn2 will panic. isave is used for temporary storage. +// +// Dlacn2 is an internal routine. It is exported for testing purposes. +func (impl Implementation) Dlacn2(n int, v, x []float64, isgn []int, est float64, kase int, isave *[3]int) (float64, int) { + switch { + case n < 1: + panic(nLT1) + case len(v) < n: + panic(shortV) + case len(x) < n: + panic(shortX) + case len(isgn) < n: + panic(shortIsgn) + case isave[0] < 0 || 5 < isave[0]: + panic(badIsave) + case isave[0] == 0 && kase != 0: + panic(badIsave) + } + + const itmax = 5 + bi := blas64.Implementation() + + if kase == 0 { + for i := 0; i < n; i++ { + x[i] = 1 / float64(n) + } + kase = 1 + isave[0] = 1 + return est, kase + } + switch isave[0] { + case 1: + if n == 1 { + v[0] = x[0] + est = math.Abs(v[0]) + kase = 0 + return est, kase + } + est = bi.Dasum(n, x, 1) + for i := 0; i < n; i++ { + x[i] = math.Copysign(1, x[i]) + isgn[i] = int(x[i]) + } + kase = 2 + isave[0] = 2 + return est, kase + case 2: + isave[1] = bi.Idamax(n, x, 1) + isave[2] = 2 + for i := 0; i < n; i++ { + x[i] = 0 + } + x[isave[1]] = 1 + kase = 1 + isave[0] = 3 + return est, kase + case 3: + bi.Dcopy(n, x, 1, v, 1) + estold := est + est = bi.Dasum(n, v, 1) + sameSigns := true + for i := 0; i < n; i++ { + if int(math.Copysign(1, x[i])) != isgn[i] { + sameSigns = false + break + } + } + if !sameSigns && est > estold { + for i := 0; i < n; i++ { + x[i] = math.Copysign(1, x[i]) + isgn[i] = int(x[i]) + } + kase = 2 + isave[0] = 4 + return est, kase + } + case 4: + jlast := isave[1] + isave[1] = bi.Idamax(n, x, 1) + if x[jlast] != math.Abs(x[isave[1]]) && isave[2] < itmax { + isave[2] += 1 + for i := 0; i < n; i++ { + x[i] = 0 + } + x[isave[1]] = 1 + kase = 1 + isave[0] = 3 + return est, kase + } + case 5: + tmp := 2 * (bi.Dasum(n, x, 1)) / float64(3*n) + if tmp > est { + bi.Dcopy(n, x, 1, v, 1) + est = tmp + } + kase = 0 + return est, kase + } + // Iteration complete. Final stage + altsgn := 1.0 + for i := 0; i < n; i++ { + x[i] = altsgn * (1 + float64(i)/float64(n-1)) + altsgn *= -1 + } + kase = 1 + isave[0] = 5 + return est, kase +} diff --git a/vendor/gonum.org/v1/gonum/lapack/gonum/dlacpy.go b/vendor/gonum.org/v1/gonum/lapack/gonum/dlacpy.go new file mode 100644 index 0000000000..793bb8c7ca --- /dev/null +++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dlacpy.go @@ -0,0 +1,59 @@ +// Copyright ©2015 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gonum + +import "gonum.org/v1/gonum/blas" + +// Dlacpy copies the elements of A specified by uplo into B. Uplo can specify +// a triangular portion with blas.Upper or blas.Lower, or can specify all of the +// elements with blas.All. +// +// Dlacpy is an internal routine. It is exported for testing purposes. +func (impl Implementation) Dlacpy(uplo blas.Uplo, m, n int, a []float64, lda int, b []float64, ldb int) { + switch { + case uplo != blas.Upper && uplo != blas.Lower && uplo != blas.All: + panic(badUplo) + case m < 0: + panic(mLT0) + case n < 0: + panic(nLT0) + case lda < max(1, n): + panic(badLdA) + case ldb < max(1, n): + panic(badLdB) + } + + if m == 0 || n == 0 { + return + } + + switch { + case len(a) < (m-1)*lda+n: + panic(shortA) + case len(b) < (m-1)*ldb+n: + panic(shortB) + } + + switch uplo { + case blas.Upper: + for i := 0; i < m; i++ { + for j := i; j < n; j++ { + b[i*ldb+j] = a[i*lda+j] + } + } + case blas.Lower: + for i := 0; i < m; i++ { + for j := 0; j < min(i+1, n); j++ { + b[i*ldb+j] = a[i*lda+j] + } + } + case blas.All: + for i := 0; i < m; i++ { + for j := 0; j < n; j++ { + b[i*ldb+j] = a[i*lda+j] + } + } + } +} diff --git a/vendor/gonum.org/v1/gonum/lapack/gonum/dlae2.go b/vendor/gonum.org/v1/gonum/lapack/gonum/dlae2.go new file mode 100644 index 0000000000..2eda3a18fe --- /dev/null +++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dlae2.go @@ -0,0 +1,51 @@ +// Copyright ©2016 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gonum + +import "math" + +// Dlae2 computes the eigenvalues of a 2×2 symmetric matrix +// +// [a b] +// [b c] +// +// and returns the eigenvalue with the larger absolute value as rt1 and the +// smaller as rt2. +// +// Dlae2 is an internal routine. It is exported for testing purposes. +func (impl Implementation) Dlae2(a, b, c float64) (rt1, rt2 float64) { + sm := a + c + df := a - c + adf := math.Abs(df) + tb := b + b + ab := math.Abs(tb) + acmx := c + acmn := a + if math.Abs(a) > math.Abs(c) { + acmx = a + acmn = c + } + var rt float64 + if adf > ab { + rt = adf * math.Sqrt(1+(ab/adf)*(ab/adf)) + } else if adf < ab { + rt = ab * math.Sqrt(1+(adf/ab)*(adf/ab)) + } else { + rt = ab * math.Sqrt2 + } + if sm < 0 { + rt1 = 0.5 * (sm - rt) + rt2 = (acmx/rt1)*acmn - (b/rt1)*b + return rt1, rt2 + } + if sm > 0 { + rt1 = 0.5 * (sm + rt) + rt2 = (acmx/rt1)*acmn - (b/rt1)*b + return rt1, rt2 + } + rt1 = 0.5 * rt + rt2 = -0.5 * rt + return rt1, rt2 +} diff --git a/vendor/gonum.org/v1/gonum/lapack/gonum/dlaev2.go b/vendor/gonum.org/v1/gonum/lapack/gonum/dlaev2.go new file mode 100644 index 0000000000..56923f51d3 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dlaev2.go @@ -0,0 +1,85 @@ +// Copyright ©2015 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gonum + +import "math" + +// Dlaev2 computes the Eigen decomposition of a symmetric 2×2 matrix. +// The matrix is given by +// +// [a b] +// [b c] +// +// Dlaev2 returns rt1 and rt2, the eigenvalues of the matrix where |RT1| > |RT2|, +// and [cs1, sn1] which is the unit right eigenvalue for RT1. +// +// [ cs1 sn1] [a b] [cs1 -sn1] = [rt1 0] +// [-sn1 cs1] [b c] [sn1 cs1] [ 0 rt2] +// +// Dlaev2 is an internal routine. It is exported for testing purposes. +func (impl Implementation) Dlaev2(a, b, c float64) (rt1, rt2, cs1, sn1 float64) { + sm := a + c + df := a - c + adf := math.Abs(df) + tb := b + b + ab := math.Abs(tb) + acmx := c + acmn := a + if math.Abs(a) > math.Abs(c) { + acmx = a + acmn = c + } + var rt float64 + if adf > ab { + rt = adf * math.Sqrt(1+(ab/adf)*(ab/adf)) + } else if adf < ab { + rt = ab * math.Sqrt(1+(adf/ab)*(adf/ab)) + } else { + rt = ab * math.Sqrt(2) + } + var sgn1 float64 + if sm < 0 { + rt1 = 0.5 * (sm - rt) + sgn1 = -1 + rt2 = (acmx/rt1)*acmn - (b/rt1)*b + } else if sm > 0 { + rt1 = 0.5 * (sm + rt) + sgn1 = 1 + rt2 = (acmx/rt1)*acmn - (b/rt1)*b + } else { + rt1 = 0.5 * rt + rt2 = -0.5 * rt + sgn1 = 1 + } + var cs, sgn2 float64 + if df >= 0 { + cs = df + rt + sgn2 = 1 + } else { + cs = df - rt + sgn2 = -1 + } + acs := math.Abs(cs) + if acs > ab { + ct := -tb / cs + sn1 = 1 / math.Sqrt(1+ct*ct) + cs1 = ct * sn1 + } else { + if ab == 0 { + cs1 = 1 + sn1 = 0 + } else { + tn := -cs / tb + cs1 = 1 / math.Sqrt(1+tn*tn) + sn1 = tn * cs1 + } + } + if sgn1 == sgn2 { + tn := cs1 + cs1 = -sn1 + sn1 = tn + } + return rt1, rt2, cs1, sn1 +} diff --git a/vendor/gonum.org/v1/gonum/lapack/gonum/dlaexc.go b/vendor/gonum.org/v1/gonum/lapack/gonum/dlaexc.go new file mode 100644 index 0000000000..2b79bd8ae7 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dlaexc.go @@ -0,0 +1,269 @@ +// Copyright ©2016 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gonum + +import ( + "math" + + "gonum.org/v1/gonum/blas" + "gonum.org/v1/gonum/blas/blas64" + "gonum.org/v1/gonum/lapack" +) + +// Dlaexc swaps two adjacent diagonal blocks of order 1 or 2 in an n×n upper +// quasi-triangular matrix T by an orthogonal similarity transformation. +// +// T must be in Schur canonical form, that is, block upper triangular with 1×1 +// and 2×2 diagonal blocks; each 2×2 diagonal block has its diagonal elements +// equal and its off-diagonal elements of opposite sign. On return, T will +// contain the updated matrix again in Schur canonical form. +// +// If wantq is true, the transformation is accumulated in the n×n matrix Q, +// otherwise Q is not referenced. +// +// j1 is the index of the first row of the first block. n1 and n2 are the order +// of the first and second block, respectively. +// +// work must have length at least n, otherwise Dlaexc will panic. +// +// If ok is false, the transformed matrix T would be too far from Schur form. +// The blocks are not swapped, and T and Q are not modified. +// +// If n1 and n2 are both equal to 1, Dlaexc will always return true. +// +// Dlaexc is an internal routine. It is exported for testing purposes. +func (impl Implementation) Dlaexc(wantq bool, n int, t []float64, ldt int, q []float64, ldq int, j1, n1, n2 int, work []float64) (ok bool) { + switch { + case n < 0: + panic(nLT0) + case ldt < max(1, n): + panic(badLdT) + case wantq && ldt < max(1, n): + panic(badLdQ) + case j1 < 0 || n <= j1: + panic(badJ1) + case len(work) < n: + panic(shortWork) + case n1 < 0 || 2 < n1: + panic(badN1) + case n2 < 0 || 2 < n2: + panic(badN2) + } + + if n == 0 || n1 == 0 || n2 == 0 { + return true + } + + switch { + case len(t) < (n-1)*ldt+n: + panic(shortT) + case wantq && len(q) < (n-1)*ldq+n: + panic(shortQ) + } + + if j1+n1 >= n { + // TODO(vladimir-ch): Reference LAPACK does this check whether + // the start of the second block is in the matrix T. It returns + // true if it is not and moreover it does not check whether the + // whole second block fits into T. This does not feel + // satisfactory. The only caller of Dlaexc is Dtrexc, so if the + // caller makes sure that this does not happen, we could be + // stricter here. + return true + } + + j2 := j1 + 1 + j3 := j1 + 2 + + bi := blas64.Implementation() + + if n1 == 1 && n2 == 1 { + // Swap two 1×1 blocks. + t11 := t[j1*ldt+j1] + t22 := t[j2*ldt+j2] + + // Determine the transformation to perform the interchange. + cs, sn, _ := impl.Dlartg(t[j1*ldt+j2], t22-t11) + + // Apply transformation to the matrix T. + if n-j3 > 0 { + bi.Drot(n-j3, t[j1*ldt+j3:], 1, t[j2*ldt+j3:], 1, cs, sn) + } + if j1 > 0 { + bi.Drot(j1, t[j1:], ldt, t[j2:], ldt, cs, sn) + } + + t[j1*ldt+j1] = t22 + t[j2*ldt+j2] = t11 + + if wantq { + // Accumulate transformation in the matrix Q. + bi.Drot(n, q[j1:], ldq, q[j2:], ldq, cs, sn) + } + + return true + } + + // Swapping involves at least one 2×2 block. + // + // Copy the diagonal block of order n1+n2 to the local array d and + // compute its norm. + nd := n1 + n2 + var d [16]float64 + const ldd = 4 + impl.Dlacpy(blas.All, nd, nd, t[j1*ldt+j1:], ldt, d[:], ldd) + dnorm := impl.Dlange(lapack.MaxAbs, nd, nd, d[:], ldd, work) + + // Compute machine-dependent threshold for test for accepting swap. + eps := dlamchP + thresh := math.Max(10*eps*dnorm, dlamchS/eps) + + // Solve T11*X - X*T22 = scale*T12 for X. + var x [4]float64 + const ldx = 2 + scale, _, _ := impl.Dlasy2(false, false, -1, n1, n2, d[:], ldd, d[n1*ldd+n1:], ldd, d[n1:], ldd, x[:], ldx) + + // Swap the adjacent diagonal blocks. + switch { + case n1 == 1 && n2 == 2: + // Generate elementary reflector H so that + // ( scale, X11, X12 ) H = ( 0, 0, * ) + u := [3]float64{scale, x[0], 1} + _, tau := impl.Dlarfg(3, x[1], u[:2], 1) + t11 := t[j1*ldt+j1] + + // Perform swap provisionally on diagonal block in d. + impl.Dlarfx(blas.Left, 3, 3, u[:], tau, d[:], ldd, work) + impl.Dlarfx(blas.Right, 3, 3, u[:], tau, d[:], ldd, work) + + // Test whether to reject swap. + if math.Max(math.Abs(d[2*ldd]), math.Max(math.Abs(d[2*ldd+1]), math.Abs(d[2*ldd+2]-t11))) > thresh { + return false + } + + // Accept swap: apply transformation to the entire matrix T. + impl.Dlarfx(blas.Left, 3, n-j1, u[:], tau, t[j1*ldt+j1:], ldt, work) + impl.Dlarfx(blas.Right, j2+1, 3, u[:], tau, t[j1:], ldt, work) + + t[j3*ldt+j1] = 0 + t[j3*ldt+j2] = 0 + t[j3*ldt+j3] = t11 + + if wantq { + // Accumulate transformation in the matrix Q. + impl.Dlarfx(blas.Right, n, 3, u[:], tau, q[j1:], ldq, work) + } + + case n1 == 2 && n2 == 1: + // Generate elementary reflector H so that: + // H ( -X11 ) = ( * ) + // ( -X21 ) = ( 0 ) + // ( scale ) = ( 0 ) + u := [3]float64{1, -x[ldx], scale} + _, tau := impl.Dlarfg(3, -x[0], u[1:], 1) + t33 := t[j3*ldt+j3] + + // Perform swap provisionally on diagonal block in D. + impl.Dlarfx(blas.Left, 3, 3, u[:], tau, d[:], ldd, work) + impl.Dlarfx(blas.Right, 3, 3, u[:], tau, d[:], ldd, work) + + // Test whether to reject swap. + if math.Max(math.Abs(d[ldd]), math.Max(math.Abs(d[2*ldd]), math.Abs(d[0]-t33))) > thresh { + return false + } + + // Accept swap: apply transformation to the entire matrix T. + impl.Dlarfx(blas.Right, j3+1, 3, u[:], tau, t[j1:], ldt, work) + impl.Dlarfx(blas.Left, 3, n-j1-1, u[:], tau, t[j1*ldt+j2:], ldt, work) + + t[j1*ldt+j1] = t33 + t[j2*ldt+j1] = 0 + t[j3*ldt+j1] = 0 + + if wantq { + // Accumulate transformation in the matrix Q. + impl.Dlarfx(blas.Right, n, 3, u[:], tau, q[j1:], ldq, work) + } + + default: // n1 == 2 && n2 == 2 + // Generate elementary reflectors H_1 and H_2 so that: + // H_2 H_1 ( -X11 -X12 ) = ( * * ) + // ( -X21 -X22 ) ( 0 * ) + // ( scale 0 ) ( 0 0 ) + // ( 0 scale ) ( 0 0 ) + u1 := [3]float64{1, -x[ldx], scale} + _, tau1 := impl.Dlarfg(3, -x[0], u1[1:], 1) + + temp := -tau1 * (x[1] + u1[1]*x[ldx+1]) + u2 := [3]float64{1, -temp * u1[2], scale} + _, tau2 := impl.Dlarfg(3, -temp*u1[1]-x[ldx+1], u2[1:], 1) + + // Perform swap provisionally on diagonal block in D. + impl.Dlarfx(blas.Left, 3, 4, u1[:], tau1, d[:], ldd, work) + impl.Dlarfx(blas.Right, 4, 3, u1[:], tau1, d[:], ldd, work) + impl.Dlarfx(blas.Left, 3, 4, u2[:], tau2, d[ldd:], ldd, work) + impl.Dlarfx(blas.Right, 4, 3, u2[:], tau2, d[1:], ldd, work) + + // Test whether to reject swap. + m1 := math.Max(math.Abs(d[2*ldd]), math.Abs(d[2*ldd+1])) + m2 := math.Max(math.Abs(d[3*ldd]), math.Abs(d[3*ldd+1])) + if math.Max(m1, m2) > thresh { + return false + } + + // Accept swap: apply transformation to the entire matrix T. + j4 := j1 + 3 + impl.Dlarfx(blas.Left, 3, n-j1, u1[:], tau1, t[j1*ldt+j1:], ldt, work) + impl.Dlarfx(blas.Right, j4+1, 3, u1[:], tau1, t[j1:], ldt, work) + impl.Dlarfx(blas.Left, 3, n-j1, u2[:], tau2, t[j2*ldt+j1:], ldt, work) + impl.Dlarfx(blas.Right, j4+1, 3, u2[:], tau2, t[j2:], ldt, work) + + t[j3*ldt+j1] = 0 + t[j3*ldt+j2] = 0 + t[j4*ldt+j1] = 0 + t[j4*ldt+j2] = 0 + + if wantq { + // Accumulate transformation in the matrix Q. + impl.Dlarfx(blas.Right, n, 3, u1[:], tau1, q[j1:], ldq, work) + impl.Dlarfx(blas.Right, n, 3, u2[:], tau2, q[j2:], ldq, work) + } + } + + if n2 == 2 { + // Standardize new 2×2 block T11. + a, b := t[j1*ldt+j1], t[j1*ldt+j2] + c, d := t[j2*ldt+j1], t[j2*ldt+j2] + var cs, sn float64 + t[j1*ldt+j1], t[j1*ldt+j2], t[j2*ldt+j1], t[j2*ldt+j2], _, _, _, _, cs, sn = impl.Dlanv2(a, b, c, d) + if n-j1-2 > 0 { + bi.Drot(n-j1-2, t[j1*ldt+j1+2:], 1, t[j2*ldt+j1+2:], 1, cs, sn) + } + if j1 > 0 { + bi.Drot(j1, t[j1:], ldt, t[j2:], ldt, cs, sn) + } + if wantq { + bi.Drot(n, q[j1:], ldq, q[j2:], ldq, cs, sn) + } + } + if n1 == 2 { + // Standardize new 2×2 block T22. + j3 := j1 + n2 + j4 := j3 + 1 + a, b := t[j3*ldt+j3], t[j3*ldt+j4] + c, d := t[j4*ldt+j3], t[j4*ldt+j4] + var cs, sn float64 + t[j3*ldt+j3], t[j3*ldt+j4], t[j4*ldt+j3], t[j4*ldt+j4], _, _, _, _, cs, sn = impl.Dlanv2(a, b, c, d) + if n-j3-2 > 0 { + bi.Drot(n-j3-2, t[j3*ldt+j3+2:], 1, t[j4*ldt+j3+2:], 1, cs, sn) + } + bi.Drot(j3, t[j3:], ldt, t[j4:], ldt, cs, sn) + if wantq { + bi.Drot(n, q[j3:], ldq, q[j4:], ldq, cs, sn) + } + } + + return true +} diff --git a/vendor/gonum.org/v1/gonum/lapack/gonum/dlag2.go b/vendor/gonum.org/v1/gonum/lapack/gonum/dlag2.go new file mode 100644 index 0000000000..cd644b65bb --- /dev/null +++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dlag2.go @@ -0,0 +1,237 @@ +// Copyright ©2021 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gonum + +import "math" + +// Dlag2 computes the eigenvalues of a 2×2 generalized eigenvalue problem +// +// A - w*B +// +// where B is an upper triangular matrix. +// +// Dlag2 uses scaling as necessary to avoid over-/underflow. Scaling results in +// a modified eigenvalue problem +// +// s*A - w*B +// +// where s is a non-negative scaling factor chosen so that w, w*B, and s*A do +// not overflow and, if possible, do not underflow, either. +// +// scale1 and scale2 are used to avoid over-/underflow in the eigenvalue +// equation which defines the first and second eigenvalue respectively. Note +// that scale1 and scale2 may be zero or less than the underflow threshold if +// the corresponding exact eigenvalue is sufficiently large. +// +// If the eigenvalues are real, then: +// - wi is zero, +// - the eigenvalues are wr1/scale1 and wr2/scale2. +// +// If the eigenvalues are complex, then: +// - wi is non-negative, +// - the eigenvalues are (wr1 ± wi*i)/scale1, +// - wr1 = wr2, +// - scale1 = scale2. +// +// Dlag2 assumes that the one-norm of A and B is less than 1/dlamchS. Entries of +// A less than sqrt(dlamchS)*norm(A) are subject to being treated as zero. The +// diagonals of B should be at least sqrt(dlamchS) times the largest element of +// B (in absolute value); if a diagonal is smaller than that, then +// ±sqrt(dlamchS) will be used instead of that diagonal. +// +// Dlag2 is an internal routine. It is exported for testing purposes. +func (Implementation) Dlag2(a []float64, lda int, b []float64, ldb int) (scale1, scale2, wr1, wr2, wi float64) { + switch { + case lda < 2: + panic(badLdA) + case ldb < 2: + panic(badLdB) + case len(a) < lda+2: + panic(shortA) + case len(b) < ldb+2: + panic(shortB) + } + + const ( + safmin = dlamchS + safmax = 1 / safmin + fuzzy1 = 1 + 1e-5 + ) + rtmin := math.Sqrt(safmin) + rtmax := 1 / rtmin + + // Scale A. + anorm := math.Max(math.Abs(a[0])+math.Abs(a[lda]), + math.Abs(a[1])+math.Abs(a[lda+1])) + anorm = math.Max(anorm, safmin) + ascale := 1 / anorm + a11 := ascale * a[0] + a21 := ascale * a[lda] + a12 := ascale * a[1] + a22 := ascale * a[lda+1] + + // Perturb B if necessary to insure non-singularity. + b11 := b[0] + b12 := b[1] + b22 := b[ldb+1] + bmin := rtmin * math.Max(math.Max(math.Abs(b11), math.Abs(b12)), + math.Max(math.Abs(b22), rtmin)) + if math.Abs(b11) < bmin { + b11 = math.Copysign(bmin, b11) + } + if math.Abs(b22) < bmin { + b22 = math.Copysign(bmin, b22) + } + + // Scale B. + bnorm := math.Max(math.Max(math.Abs(b11), math.Abs(b12)+math.Abs(b22)), safmin) + bsize := math.Max(math.Abs(b11), math.Abs(b22)) + bscale := 1 / bsize + b11 *= bscale + b12 *= bscale + b22 *= bscale + + // Compute larger eigenvalue by method described by C. van Loan. + var ( + as12, abi22 float64 + pp, qq, shift float64 + ) + binv11 := 1 / b11 + binv22 := 1 / b22 + s1 := a11 * binv11 + s2 := a22 * binv22 + // AS is A shifted by -shift*B. + if math.Abs(s1) <= math.Abs(s2) { + shift = s1 + as12 = a12 - shift*b12 + as22 := a22 - shift*b22 + ss := a21 * (binv11 * binv22) + abi22 = as22*binv22 - ss*b12 + pp = 0.5 * abi22 + qq = ss * as12 + } else { + shift = s2 + as12 = a12 - shift*b12 + as11 := a11 - shift*b11 + ss := a21 * (binv11 * binv22) + abi22 = -ss * b12 + pp = 0.5 * (as11*binv11 + abi22) + qq = ss * as12 + } + var discr, r float64 + if math.Abs(pp*rtmin) >= 1 { + tmp := rtmin * pp + discr = tmp*tmp + qq*safmin + r = math.Sqrt(math.Abs(discr)) * rtmax + } else { + pp2 := pp * pp + if pp2+math.Abs(qq) <= safmin { + tmp := rtmax * pp + discr = tmp*tmp + qq*safmax + r = math.Sqrt(math.Abs(discr)) * rtmin + } else { + discr = pp2 + qq + r = math.Sqrt(math.Abs(discr)) + } + } + + // TODO(vladimir-ch): Is the following comment from the reference needed in + // a Go implementation? + // + // Note: the test of r in the following `if` is to cover the case when discr + // is small and negative and is flushed to zero during the calculation of r. + // On machines which have a consistent flush-to-zero threshold and handle + // numbers above that threshold correctly, it would not be necessary. + if discr >= 0 || r == 0 { + sum := pp + math.Copysign(r, pp) + diff := pp - math.Copysign(r, pp) + wbig := shift + sum + + // Compute smaller eigenvalue. + wsmall := shift + diff + if 0.5*math.Abs(wbig) > math.Max(math.Abs(wsmall), safmin) { + wdet := (a11*a22 - a12*a21) * (binv11 * binv22) + wsmall = wdet / wbig + } + // Choose (real) eigenvalue closest to 2,2 element of A*B^{-1} for wr1. + if pp > abi22 { + wr1 = math.Min(wbig, wsmall) + wr2 = math.Max(wbig, wsmall) + } else { + wr1 = math.Max(wbig, wsmall) + wr2 = math.Min(wbig, wsmall) + } + } else { + // Complex eigenvalues. + wr1 = shift + pp + wr2 = wr1 + wi = r + } + + // Further scaling to avoid underflow and overflow in computing + // scale1 and overflow in computing w*B. + // + // This scale factor (wscale) is bounded from above using c1 and c2, + // and from below using c3 and c4: + // - c1 implements the condition s*A must never overflow. + // - c2 implements the condition w*B must never overflow. + // - c3, with c2, implement the condition that s*A - w*B must never overflow. + // - c4 implements the condition s should not underflow. + // - c5 implements the condition max(s,|w|) should be at least 2. + c1 := bsize * (safmin * math.Max(1, ascale)) + c2 := safmin * math.Max(1, bnorm) + c3 := bsize * safmin + c4 := 1.0 + c5 := 1.0 + if ascale <= 1 || bsize <= 1 { + c5 = math.Min(1, ascale*bsize) + if ascale <= 1 && bsize <= 1 { + c4 = math.Min(1, (ascale/safmin)*bsize) + } + } + + // Scale first eigenvalue. + wabs := math.Abs(wr1) + math.Abs(wi) + wsize := math.Max(math.Max(safmin, c1), math.Max(fuzzy1*(wabs*c2+c3), + math.Min(c4, 0.5*math.Max(wabs, c5)))) + maxABsize := math.Max(ascale, bsize) + minABsize := math.Min(ascale, bsize) + if wsize != 1 { + wscale := 1 / wsize + if wsize > 1 { + scale1 = (maxABsize * wscale) * minABsize + } else { + scale1 = (minABsize * wscale) * maxABsize + } + wr1 *= wscale + if wi != 0 { + wi *= wscale + wr2 = wr1 + scale2 = scale1 + } + } else { + scale1 = ascale * bsize + scale2 = scale1 + } + + // Scale second eigenvalue if real. + if wi == 0 { + wsize = math.Max(math.Max(safmin, c1), math.Max(fuzzy1*(math.Abs(wr2)*c2+c3), + math.Min(c4, 0.5*math.Max(math.Abs(wr2), c5)))) + if wsize != 1 { + wscale := 1 / wsize + if wsize > 1 { + scale2 = (maxABsize * wscale) * minABsize + } else { + scale2 = (minABsize * wscale) * maxABsize + } + wr2 *= wscale + } else { + scale2 = ascale * bsize + } + } + + return scale1, scale2, wr1, wr2, wi +} diff --git a/vendor/gonum.org/v1/gonum/lapack/gonum/dlags2.go b/vendor/gonum.org/v1/gonum/lapack/gonum/dlags2.go new file mode 100644 index 0000000000..7bd4f21970 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dlags2.go @@ -0,0 +1,186 @@ +// Copyright ©2017 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gonum + +import "math" + +// Dlags2 computes 2-by-2 orthogonal matrices U, V and Q with the +// triangles of A and B specified by upper. +// +// If upper is true +// +// Uᵀ*A*Q = Uᵀ*[ a1 a2 ]*Q = [ x 0 ] +// [ 0 a3 ] [ x x ] +// +// and +// +// Vᵀ*B*Q = Vᵀ*[ b1 b2 ]*Q = [ x 0 ] +// [ 0 b3 ] [ x x ] +// +// otherwise +// +// Uᵀ*A*Q = Uᵀ*[ a1 0 ]*Q = [ x x ] +// [ a2 a3 ] [ 0 x ] +// +// and +// +// Vᵀ*B*Q = Vᵀ*[ b1 0 ]*Q = [ x x ] +// [ b2 b3 ] [ 0 x ]. +// +// The rows of the transformed A and B are parallel, where +// +// U = [ csu snu ], V = [ csv snv ], Q = [ csq snq ] +// [ -snu csu ] [ -snv csv ] [ -snq csq ] +// +// Dlags2 is an internal routine. It is exported for testing purposes. +func (impl Implementation) Dlags2(upper bool, a1, a2, a3, b1, b2, b3 float64) (csu, snu, csv, snv, csq, snq float64) { + if upper { + // Input matrices A and B are upper triangular matrices. + // + // Form matrix C = A*adj(B) = [ a b ] + // [ 0 d ] + a := a1 * b3 + d := a3 * b1 + b := a2*b1 - a1*b2 + + // The SVD of real 2-by-2 triangular C. + // + // [ csl -snl ]*[ a b ]*[ csr snr ] = [ r 0 ] + // [ snl csl ] [ 0 d ] [ -snr csr ] [ 0 t ] + _, _, snr, csr, snl, csl := impl.Dlasv2(a, b, d) + + if math.Abs(csl) >= math.Abs(snl) || math.Abs(csr) >= math.Abs(snr) { + // Compute the [0, 0] and [0, 1] elements of Uᵀ*A and Vᵀ*B, + // and [0, 1] element of |U|ᵀ*|A| and |V|ᵀ*|B|. + + ua11r := csl * a1 + ua12 := csl*a2 + snl*a3 + + vb11r := csr * b1 + vb12 := csr*b2 + snr*b3 + + aua12 := math.Abs(csl)*math.Abs(a2) + math.Abs(snl)*math.Abs(a3) + avb12 := math.Abs(csr)*math.Abs(b2) + math.Abs(snr)*math.Abs(b3) + + // Zero [0, 1] elements of Uᵀ*A and Vᵀ*B. + if math.Abs(ua11r)+math.Abs(ua12) != 0 { + if aua12/(math.Abs(ua11r)+math.Abs(ua12)) <= avb12/(math.Abs(vb11r)+math.Abs(vb12)) { + csq, snq, _ = impl.Dlartg(-ua11r, ua12) + } else { + csq, snq, _ = impl.Dlartg(-vb11r, vb12) + } + } else { + csq, snq, _ = impl.Dlartg(-vb11r, vb12) + } + + csu = csl + snu = -snl + csv = csr + snv = -snr + } else { + // Compute the [1, 0] and [1, 1] elements of Uᵀ*A and Vᵀ*B, + // and [1, 1] element of |U|ᵀ*|A| and |V|ᵀ*|B|. + + ua21 := -snl * a1 + ua22 := -snl*a2 + csl*a3 + + vb21 := -snr * b1 + vb22 := -snr*b2 + csr*b3 + + aua22 := math.Abs(snl)*math.Abs(a2) + math.Abs(csl)*math.Abs(a3) + avb22 := math.Abs(snr)*math.Abs(b2) + math.Abs(csr)*math.Abs(b3) + + // Zero [1, 1] elements of Uᵀ*A and Vᵀ*B, and then swap. + if math.Abs(ua21)+math.Abs(ua22) != 0 { + if aua22/(math.Abs(ua21)+math.Abs(ua22)) <= avb22/(math.Abs(vb21)+math.Abs(vb22)) { + csq, snq, _ = impl.Dlartg(-ua21, ua22) + } else { + csq, snq, _ = impl.Dlartg(-vb21, vb22) + } + } else { + csq, snq, _ = impl.Dlartg(-vb21, vb22) + } + + csu = snl + snu = csl + csv = snr + snv = csr + } + } else { + // Input matrices A and B are lower triangular matrices + // + // Form matrix C = A*adj(B) = [ a 0 ] + // [ c d ] + a := a1 * b3 + d := a3 * b1 + c := a2*b3 - a3*b2 + + // The SVD of real 2-by-2 triangular C + // + // [ csl -snl ]*[ a 0 ]*[ csr snr ] = [ r 0 ] + // [ snl csl ] [ c d ] [ -snr csr ] [ 0 t ] + _, _, snr, csr, snl, csl := impl.Dlasv2(a, c, d) + + if math.Abs(csr) >= math.Abs(snr) || math.Abs(csl) >= math.Abs(snl) { + // Compute the [1, 0] and [1, 1] elements of Uᵀ*A and Vᵀ*B, + // and [1, 0] element of |U|ᵀ*|A| and |V|ᵀ*|B|. + + ua21 := -snr*a1 + csr*a2 + ua22r := csr * a3 + + vb21 := -snl*b1 + csl*b2 + vb22r := csl * b3 + + aua21 := math.Abs(snr)*math.Abs(a1) + math.Abs(csr)*math.Abs(a2) + avb21 := math.Abs(snl)*math.Abs(b1) + math.Abs(csl)*math.Abs(b2) + + // Zero [1, 0] elements of Uᵀ*A and Vᵀ*B. + if (math.Abs(ua21) + math.Abs(ua22r)) != 0 { + if aua21/(math.Abs(ua21)+math.Abs(ua22r)) <= avb21/(math.Abs(vb21)+math.Abs(vb22r)) { + csq, snq, _ = impl.Dlartg(ua22r, ua21) + } else { + csq, snq, _ = impl.Dlartg(vb22r, vb21) + } + } else { + csq, snq, _ = impl.Dlartg(vb22r, vb21) + } + + csu = csr + snu = -snr + csv = csl + snv = -snl + } else { + // Compute the [0, 0] and [0, 1] elements of Uᵀ *A and Vᵀ *B, + // and [0, 0] element of |U|ᵀ*|A| and |V|ᵀ*|B|. + + ua11 := csr*a1 + snr*a2 + ua12 := snr * a3 + + vb11 := csl*b1 + snl*b2 + vb12 := snl * b3 + + aua11 := math.Abs(csr)*math.Abs(a1) + math.Abs(snr)*math.Abs(a2) + avb11 := math.Abs(csl)*math.Abs(b1) + math.Abs(snl)*math.Abs(b2) + + // Zero [0, 0] elements of Uᵀ*A and Vᵀ*B, and then swap. + if (math.Abs(ua11) + math.Abs(ua12)) != 0 { + if aua11/(math.Abs(ua11)+math.Abs(ua12)) <= avb11/(math.Abs(vb11)+math.Abs(vb12)) { + csq, snq, _ = impl.Dlartg(ua12, ua11) + } else { + csq, snq, _ = impl.Dlartg(vb12, vb11) + } + } else { + csq, snq, _ = impl.Dlartg(vb12, vb11) + } + + csu = snr + snu = csr + csv = snl + snv = csl + } + } + + return csu, snu, csv, snv, csq, snq +} diff --git a/vendor/gonum.org/v1/gonum/lapack/gonum/dlagtm.go b/vendor/gonum.org/v1/gonum/lapack/gonum/dlagtm.go new file mode 100644 index 0000000000..fc8c8eb403 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dlagtm.go @@ -0,0 +1,111 @@ +// Copyright ©2020 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gonum + +import "gonum.org/v1/gonum/blas" + +// Dlagtm performs one of the matrix-matrix operations +// +// C = alpha * A * B + beta * C if trans == blas.NoTrans +// C = alpha * Aᵀ * B + beta * C if trans == blas.Trans or blas.ConjTrans +// +// where A is an m×m tridiagonal matrix represented by its diagonals dl, d, du, +// B and C are m×n dense matrices, and alpha and beta are scalars. +func (impl Implementation) Dlagtm(trans blas.Transpose, m, n int, alpha float64, dl, d, du []float64, b []float64, ldb int, beta float64, c []float64, ldc int) { + switch { + case trans != blas.NoTrans && trans != blas.Trans && trans != blas.ConjTrans: + panic(badTrans) + case m < 0: + panic(mLT0) + case n < 0: + panic(nLT0) + case ldb < max(1, n): + panic(badLdB) + case ldc < max(1, n): + panic(badLdC) + } + + if m == 0 || n == 0 { + return + } + + switch { + case len(dl) < m-1: + panic(shortDL) + case len(d) < m: + panic(shortD) + case len(du) < m-1: + panic(shortDU) + case len(b) < (m-1)*ldb+n: + panic(shortB) + case len(c) < (m-1)*ldc+n: + panic(shortC) + } + + if beta != 1 { + if beta == 0 { + for i := 0; i < m; i++ { + ci := c[i*ldc : i*ldc+n] + for j := range ci { + ci[j] = 0 + } + } + } else { + for i := 0; i < m; i++ { + ci := c[i*ldc : i*ldc+n] + for j := range ci { + ci[j] *= beta + } + } + } + } + + if alpha == 0 { + return + } + + if m == 1 { + if alpha == 1 { + for j := 0; j < n; j++ { + c[j] += d[0] * b[j] + } + } else { + for j := 0; j < n; j++ { + c[j] += alpha * d[0] * b[j] + } + } + return + } + + if trans != blas.NoTrans { + dl, du = du, dl + } + + if alpha == 1 { + for j := 0; j < n; j++ { + c[j] += d[0]*b[j] + du[0]*b[ldb+j] + } + for i := 1; i < m-1; i++ { + for j := 0; j < n; j++ { + c[i*ldc+j] += dl[i-1]*b[(i-1)*ldb+j] + d[i]*b[i*ldb+j] + du[i]*b[(i+1)*ldb+j] + } + } + for j := 0; j < n; j++ { + c[(m-1)*ldc+j] += dl[m-2]*b[(m-2)*ldb+j] + d[m-1]*b[(m-1)*ldb+j] + } + } else { + for j := 0; j < n; j++ { + c[j] += alpha * (d[0]*b[j] + du[0]*b[ldb+j]) + } + for i := 1; i < m-1; i++ { + for j := 0; j < n; j++ { + c[i*ldc+j] += alpha * (dl[i-1]*b[(i-1)*ldb+j] + d[i]*b[i*ldb+j] + du[i]*b[(i+1)*ldb+j]) + } + } + for j := 0; j < n; j++ { + c[(m-1)*ldc+j] += alpha * (dl[m-2]*b[(m-2)*ldb+j] + d[m-1]*b[(m-1)*ldb+j]) + } + } +} diff --git a/vendor/gonum.org/v1/gonum/lapack/gonum/dlahqr.go b/vendor/gonum.org/v1/gonum/lapack/gonum/dlahqr.go new file mode 100644 index 0000000000..6f1202547e --- /dev/null +++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dlahqr.go @@ -0,0 +1,449 @@ +// Copyright ©2016 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gonum + +import ( + "math" + + "gonum.org/v1/gonum/blas/blas64" +) + +// Dlahqr computes the eigenvalues and Schur factorization of a block of an n×n +// upper Hessenberg matrix H, using the double-shift/single-shift QR algorithm. +// +// h and ldh represent the matrix H. Dlahqr works primarily with the Hessenberg +// submatrix H[ilo:ihi+1,ilo:ihi+1], but applies transformations to all of H if +// wantt is true. It is assumed that H[ihi+1:n,ihi+1:n] is already upper +// quasi-triangular, although this is not checked. +// +// It must hold that +// +// 0 <= ilo <= max(0,ihi), and ihi < n, +// +// and that +// +// H[ilo,ilo-1] == 0, if ilo > 0, +// +// otherwise Dlahqr will panic. +// +// If unconverged is zero on return, wr[ilo:ihi+1] and wi[ilo:ihi+1] will contain +// respectively the real and imaginary parts of the computed eigenvalues ilo +// to ihi. If two eigenvalues are computed as a complex conjugate pair, they are +// stored in consecutive elements of wr and wi, say the i-th and (i+1)th, with +// wi[i] > 0 and wi[i+1] < 0. If wantt is true, the eigenvalues are stored in +// the same order as on the diagonal of the Schur form returned in H, with +// wr[i] = H[i,i], and, if H[i:i+2,i:i+2] is a 2×2 diagonal block, +// wi[i] = sqrt(abs(H[i+1,i]*H[i,i+1])) and wi[i+1] = -wi[i]. +// +// wr and wi must have length ihi+1. +// +// z and ldz represent an n×n matrix Z. If wantz is true, the transformations +// will be applied to the submatrix Z[iloz:ihiz+1,ilo:ihi+1] and it must hold that +// +// 0 <= iloz <= ilo, and ihi <= ihiz < n. +// +// If wantz is false, z is not referenced. +// +// unconverged indicates whether Dlahqr computed all the eigenvalues ilo to ihi +// in a total of 30 iterations per eigenvalue. +// +// If unconverged is zero, all the eigenvalues ilo to ihi have been computed and +// will be stored on return in wr[ilo:ihi+1] and wi[ilo:ihi+1]. +// +// If unconverged is zero and wantt is true, H[ilo:ihi+1,ilo:ihi+1] will be +// overwritten on return by upper quasi-triangular full Schur form with any +// 2×2 diagonal blocks in standard form. +// +// If unconverged is zero and if wantt is false, the contents of h on return is +// unspecified. +// +// If unconverged is positive, some eigenvalues have not converged, and +// wr[unconverged:ihi+1] and wi[unconverged:ihi+1] contain those eigenvalues +// which have been successfully computed. +// +// If unconverged is positive and wantt is true, then on return +// +// (initial H)*U = U*(final H), (*) +// +// where U is an orthogonal matrix. The final H is upper Hessenberg and +// H[unconverged:ihi+1,unconverged:ihi+1] is upper quasi-triangular. +// +// If unconverged is positive and wantt is false, on return the remaining +// unconverged eigenvalues are the eigenvalues of the upper Hessenberg matrix +// H[ilo:unconverged,ilo:unconverged]. +// +// If unconverged is positive and wantz is true, then on return +// +// (final Z) = (initial Z)*U, +// +// where U is the orthogonal matrix in (*) regardless of the value of wantt. +// +// Dlahqr is an internal routine. It is exported for testing purposes. +func (impl Implementation) Dlahqr(wantt, wantz bool, n, ilo, ihi int, h []float64, ldh int, wr, wi []float64, iloz, ihiz int, z []float64, ldz int) (unconverged int) { + switch { + case n < 0: + panic(nLT0) + case ilo < 0, max(0, ihi) < ilo: + panic(badIlo) + case ihi >= n: + panic(badIhi) + case ldh < max(1, n): + panic(badLdH) + case wantz && (iloz < 0 || ilo < iloz): + panic(badIloz) + case wantz && (ihiz < ihi || n <= ihiz): + panic(badIhiz) + case ldz < 1, wantz && ldz < n: + panic(badLdZ) + } + + // Quick return if possible. + if n == 0 { + return 0 + } + + switch { + case len(h) < (n-1)*ldh+n: + panic(shortH) + case len(wr) != ihi+1: + panic(shortWr) + case len(wi) != ihi+1: + panic(shortWi) + case wantz && len(z) < (n-1)*ldz+n: + panic(shortZ) + case ilo > 0 && h[ilo*ldh+ilo-1] != 0: + panic(notIsolated) + } + + if ilo == ihi { + wr[ilo] = h[ilo*ldh+ilo] + wi[ilo] = 0 + return 0 + } + + // Clear out the trash. + for j := ilo; j < ihi-2; j++ { + h[(j+2)*ldh+j] = 0 + h[(j+3)*ldh+j] = 0 + } + if ilo <= ihi-2 { + h[ihi*ldh+ihi-2] = 0 + } + + nh := ihi - ilo + 1 + nz := ihiz - iloz + 1 + + // Set machine-dependent constants for the stopping criterion. + ulp := dlamchP + smlnum := float64(nh) / ulp * dlamchS + + // i1 and i2 are the indices of the first row and last column of H to + // which transformations must be applied. If eigenvalues only are being + // computed, i1 and i2 are set inside the main loop. + var i1, i2 int + if wantt { + i1 = 0 + i2 = n - 1 + } + + itmax := 30 * max(10, nh) // Total number of QR iterations allowed. + + // kdefl counts the number of iterations since a deflation. + kdefl := 0 + + // The main loop begins here. i is the loop index and decreases from ihi + // to ilo in steps of 1 or 2. Each iteration of the loop works with the + // active submatrix in rows and columns l to i. Eigenvalues i+1 to ihi + // have already converged. Either l = ilo or H[l,l-1] is negligible so + // that the matrix splits. + bi := blas64.Implementation() + i := ihi + for i >= ilo { + l := ilo + + // Perform QR iterations on rows and columns ilo to i until a + // submatrix of order 1 or 2 splits off at the bottom because a + // subdiagonal element has become negligible. + converged := false + for its := 0; its <= itmax; its++ { + // Look for a single small subdiagonal element. + var k int + for k = i; k > l; k-- { + if math.Abs(h[k*ldh+k-1]) <= smlnum { + break + } + tst := math.Abs(h[(k-1)*ldh+k-1]) + math.Abs(h[k*ldh+k]) + if tst == 0 { + if k-2 >= ilo { + tst += math.Abs(h[(k-1)*ldh+k-2]) + } + if k+1 <= ihi { + tst += math.Abs(h[(k+1)*ldh+k]) + } + } + // The following is a conservative small + // subdiagonal deflation criterion due to Ahues + // & Tisseur (LAWN 122, 1997). It has better + // mathematical foundation and improves accuracy + // in some cases. + if math.Abs(h[k*ldh+k-1]) <= ulp*tst { + ab := math.Max(math.Abs(h[k*ldh+k-1]), math.Abs(h[(k-1)*ldh+k])) + ba := math.Min(math.Abs(h[k*ldh+k-1]), math.Abs(h[(k-1)*ldh+k])) + aa := math.Max(math.Abs(h[k*ldh+k]), math.Abs(h[(k-1)*ldh+k-1]-h[k*ldh+k])) + bb := math.Min(math.Abs(h[k*ldh+k]), math.Abs(h[(k-1)*ldh+k-1]-h[k*ldh+k])) + s := aa + ab + if ab/s*ba <= math.Max(smlnum, aa/s*bb*ulp) { + break + } + } + } + l = k + if l > ilo { + // H[l,l-1] is negligible. + h[l*ldh+l-1] = 0 + } + if l >= i-1 { + // Break the loop because a submatrix of order 1 + // or 2 has split off. + converged = true + break + } + kdefl++ + + // Now the active submatrix is in rows and columns l to + // i. If eigenvalues only are being computed, only the + // active submatrix need be transformed. + if !wantt { + i1 = l + i2 = i + } + + const ( + dat1 = 0.75 + dat2 = -0.4375 + kexsh = 10 + ) + var h11, h21, h12, h22 float64 + switch { + case kdefl%(2*kexsh) == 0: // Exceptional shift. + s := math.Abs(h[i*ldh+i-1]) + math.Abs(h[(i-1)*ldh+i-2]) + h11 = dat1*s + h[i*ldh+i] + h12 = dat2 * s + h21 = s + h22 = h11 + case kdefl%kexsh == 0: // Exceptional shift. + s := math.Abs(h[(l+1)*ldh+l]) + math.Abs(h[(l+2)*ldh+l+1]) + h11 = dat1*s + h[l*ldh+l] + h12 = dat2 * s + h21 = s + h22 = h11 + default: // Prepare to use Francis' double shift (i.e., + // 2nd degree generalized Rayleigh quotient). + h11 = h[(i-1)*ldh+i-1] + h21 = h[i*ldh+i-1] + h12 = h[(i-1)*ldh+i] + h22 = h[i*ldh+i] + } + s := math.Abs(h11) + math.Abs(h12) + math.Abs(h21) + math.Abs(h22) + var ( + rt1r, rt1i float64 + rt2r, rt2i float64 + ) + if s != 0 { + h11 /= s + h21 /= s + h12 /= s + h22 /= s + tr := (h11 + h22) / 2 + det := (h11-tr)*(h22-tr) - h12*h21 + rtdisc := math.Sqrt(math.Abs(det)) + if det >= 0 { + // Complex conjugate shifts. + rt1r = tr * s + rt2r = rt1r + rt1i = rtdisc * s + rt2i = -rt1i + } else { + // Real shifts (use only one of them). + rt1r = tr + rtdisc + rt2r = tr - rtdisc + if math.Abs(rt1r-h22) <= math.Abs(rt2r-h22) { + rt1r *= s + rt2r = rt1r + } else { + rt2r *= s + rt1r = rt2r + } + rt1i = 0 + rt2i = 0 + } + } + + // Look for two consecutive small subdiagonal elements. + var m int + var v [3]float64 + for m = i - 2; m >= l; m-- { + // Determine the effect of starting the + // double-shift QR iteration at row m, and see + // if this would make H[m,m-1] negligible. The + // following uses scaling to avoid overflows and + // most underflows. + h21s := h[(m+1)*ldh+m] + s := math.Abs(h[m*ldh+m]-rt2r) + math.Abs(rt2i) + math.Abs(h21s) + h21s /= s + v[0] = h21s*h[m*ldh+m+1] + (h[m*ldh+m]-rt1r)*((h[m*ldh+m]-rt2r)/s) - rt2i/s*rt1i + v[1] = h21s * (h[m*ldh+m] + h[(m+1)*ldh+m+1] - rt1r - rt2r) + v[2] = h21s * h[(m+2)*ldh+m+1] + s = math.Abs(v[0]) + math.Abs(v[1]) + math.Abs(v[2]) + v[0] /= s + v[1] /= s + v[2] /= s + if m == l { + break + } + dsum := math.Abs(h[(m-1)*ldh+m-1]) + math.Abs(h[m*ldh+m]) + math.Abs(h[(m+1)*ldh+m+1]) + if math.Abs(h[m*ldh+m-1])*(math.Abs(v[1])+math.Abs(v[2])) <= ulp*math.Abs(v[0])*dsum { + break + } + } + + // Double-shift QR step. + for k := m; k < i; k++ { + // The first iteration of this loop determines a + // reflection G from the vector V and applies it + // from left and right to H, thus creating a + // non-zero bulge below the subdiagonal. + // + // Each subsequent iteration determines a + // reflection G to restore the Hessenberg form + // in the (k-1)th column, and thus chases the + // bulge one step toward the bottom of the + // active submatrix. nr is the order of G. + + nr := min(3, i-k+1) + if k > m { + bi.Dcopy(nr, h[k*ldh+k-1:], ldh, v[:], 1) + } + var t0 float64 + v[0], t0 = impl.Dlarfg(nr, v[0], v[1:], 1) + if k > m { + h[k*ldh+k-1] = v[0] + h[(k+1)*ldh+k-1] = 0 + if k < i-1 { + h[(k+2)*ldh+k-1] = 0 + } + } else if m > l { + // Use the following instead of H[k,k-1] = -H[k,k-1] + // to avoid a bug when v[1] and v[2] underflow. + h[k*ldh+k-1] *= 1 - t0 + } + t1 := t0 * v[1] + if nr == 3 { + t2 := t0 * v[2] + + // Apply G from the left to transform + // the rows of the matrix in columns k + // to i2. + for j := k; j <= i2; j++ { + sum := h[k*ldh+j] + v[1]*h[(k+1)*ldh+j] + v[2]*h[(k+2)*ldh+j] + h[k*ldh+j] -= sum * t0 + h[(k+1)*ldh+j] -= sum * t1 + h[(k+2)*ldh+j] -= sum * t2 + } + + // Apply G from the right to transform + // the columns of the matrix in rows i1 + // to min(k+3,i). + for j := i1; j <= min(k+3, i); j++ { + sum := h[j*ldh+k] + v[1]*h[j*ldh+k+1] + v[2]*h[j*ldh+k+2] + h[j*ldh+k] -= sum * t0 + h[j*ldh+k+1] -= sum * t1 + h[j*ldh+k+2] -= sum * t2 + } + + if wantz { + // Accumulate transformations in the matrix Z. + for j := iloz; j <= ihiz; j++ { + sum := z[j*ldz+k] + v[1]*z[j*ldz+k+1] + v[2]*z[j*ldz+k+2] + z[j*ldz+k] -= sum * t0 + z[j*ldz+k+1] -= sum * t1 + z[j*ldz+k+2] -= sum * t2 + } + } + } else if nr == 2 { + // Apply G from the left to transform + // the rows of the matrix in columns k + // to i2. + for j := k; j <= i2; j++ { + sum := h[k*ldh+j] + v[1]*h[(k+1)*ldh+j] + h[k*ldh+j] -= sum * t0 + h[(k+1)*ldh+j] -= sum * t1 + } + + // Apply G from the right to transform + // the columns of the matrix in rows i1 + // to min(k+3,i). + for j := i1; j <= i; j++ { + sum := h[j*ldh+k] + v[1]*h[j*ldh+k+1] + h[j*ldh+k] -= sum * t0 + h[j*ldh+k+1] -= sum * t1 + } + + if wantz { + // Accumulate transformations in the matrix Z. + for j := iloz; j <= ihiz; j++ { + sum := z[j*ldz+k] + v[1]*z[j*ldz+k+1] + z[j*ldz+k] -= sum * t0 + z[j*ldz+k+1] -= sum * t1 + } + } + } + } + } + + if !converged { + // The QR iteration finished without splitting off a + // submatrix of order 1 or 2. + return i + 1 + } + + if l == i { + // H[i,i-1] is negligible: one eigenvalue has converged. + wr[i] = h[i*ldh+i] + wi[i] = 0 + } else if l == i-1 { + // H[i-1,i-2] is negligible: a pair of eigenvalues have converged. + + // Transform the 2×2 submatrix to standard Schur form, + // and compute and store the eigenvalues. + var cs, sn float64 + a, b := h[(i-1)*ldh+i-1], h[(i-1)*ldh+i] + c, d := h[i*ldh+i-1], h[i*ldh+i] + a, b, c, d, wr[i-1], wi[i-1], wr[i], wi[i], cs, sn = impl.Dlanv2(a, b, c, d) + h[(i-1)*ldh+i-1], h[(i-1)*ldh+i] = a, b + h[i*ldh+i-1], h[i*ldh+i] = c, d + + if wantt { + // Apply the transformation to the rest of H. + if i2 > i { + bi.Drot(i2-i, h[(i-1)*ldh+i+1:], 1, h[i*ldh+i+1:], 1, cs, sn) + } + bi.Drot(i-i1-1, h[i1*ldh+i-1:], ldh, h[i1*ldh+i:], ldh, cs, sn) + } + + if wantz { + // Apply the transformation to Z. + bi.Drot(nz, z[iloz*ldz+i-1:], ldz, z[iloz*ldz+i:], ldz, cs, sn) + } + } + + // Reset deflation counter. + kdefl = 0 + + // Return to start of the main loop with new value of i. + i = l - 1 + } + return 0 +} diff --git a/vendor/gonum.org/v1/gonum/lapack/gonum/dlahr2.go b/vendor/gonum.org/v1/gonum/lapack/gonum/dlahr2.go new file mode 100644 index 0000000000..5921473342 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dlahr2.go @@ -0,0 +1,202 @@ +// Copyright ©2016 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gonum + +import ( + "gonum.org/v1/gonum/blas" + "gonum.org/v1/gonum/blas/blas64" +) + +// Dlahr2 reduces the first nb columns of a real general n×(n-k+1) matrix A so +// that elements below the k-th subdiagonal are zero. The reduction is performed +// by an orthogonal similarity transformation Qᵀ * A * Q. Dlahr2 returns the +// matrices V and T which determine Q as a block reflector I - V*T*Vᵀ, and +// also the matrix Y = A * V * T. +// +// The matrix Q is represented as a product of nb elementary reflectors +// +// Q = H_0 * H_1 * ... * H_{nb-1}. +// +// Each H_i has the form +// +// H_i = I - tau[i] * v * vᵀ, +// +// where v is a real vector with v[0:i+k-1] = 0 and v[i+k-1] = 1. v[i+k:n] is +// stored on exit in A[i+k+1:n,i]. +// +// The elements of the vectors v together form the (n-k+1)×nb matrix +// V which is needed, with T and Y, to apply the transformation to the +// unreduced part of the matrix, using an update of the form +// +// A = (I - V*T*Vᵀ) * (A - Y*Vᵀ). +// +// On entry, a contains the n×(n-k+1) general matrix A. On return, the elements +// on and above the k-th subdiagonal in the first nb columns are overwritten +// with the corresponding elements of the reduced matrix; the elements below the +// k-th subdiagonal, with the slice tau, represent the matrix Q as a product of +// elementary reflectors. The other columns of A are unchanged. +// +// The contents of A on exit are illustrated by the following example +// with n = 7, k = 3 and nb = 2: +// +// [ a a a a a ] +// [ a a a a a ] +// [ a a a a a ] +// [ h h a a a ] +// [ v0 h a a a ] +// [ v0 v1 a a a ] +// [ v0 v1 a a a ] +// +// where a denotes an element of the original matrix A, h denotes a +// modified element of the upper Hessenberg matrix H, and vi denotes an +// element of the vector defining H_i. +// +// k is the offset for the reduction. Elements below the k-th subdiagonal in the +// first nb columns are reduced to zero. +// +// nb is the number of columns to be reduced. +// +// On entry, a represents the n×(n-k+1) matrix A. On return, the elements on and +// above the k-th subdiagonal in the first nb columns are overwritten with the +// corresponding elements of the reduced matrix. The elements below the k-th +// subdiagonal, with the slice tau, represent the matrix Q as a product of +// elementary reflectors. The other columns of A are unchanged. +// +// tau will contain the scalar factors of the elementary reflectors. It must +// have length at least nb. +// +// t and ldt represent the nb×nb upper triangular matrix T, and y and ldy +// represent the n×nb matrix Y. +// +// Dlahr2 is an internal routine. It is exported for testing purposes. +func (impl Implementation) Dlahr2(n, k, nb int, a []float64, lda int, tau, t []float64, ldt int, y []float64, ldy int) { + switch { + case n < 0: + panic(nLT0) + case k < 0: + panic(kLT0) + case nb < 0: + panic(nbLT0) + case nb > n: + panic(nbGTN) + case lda < max(1, n-k+1): + panic(badLdA) + case ldt < max(1, nb): + panic(badLdT) + case ldy < max(1, nb): + panic(badLdY) + } + + // Quick return if possible. + if n < 0 { + return + } + + switch { + case len(a) < (n-1)*lda+n-k+1: + panic(shortA) + case len(tau) < nb: + panic(shortTau) + case len(t) < (nb-1)*ldt+nb: + panic(shortT) + case len(y) < (n-1)*ldy+nb: + panic(shortY) + } + + // Quick return if possible. + if n == 1 { + return + } + + bi := blas64.Implementation() + var ei float64 + for i := 0; i < nb; i++ { + if i > 0 { + // Update A[k:n,i]. + + // Update i-th column of A - Y * Vᵀ. + bi.Dgemv(blas.NoTrans, n-k, i, + -1, y[k*ldy:], ldy, + a[(k+i-1)*lda:], 1, + 1, a[k*lda+i:], lda) + + // Apply I - V * Tᵀ * Vᵀ to this column (call it b) + // from the left, using the last column of T as + // workspace. + // Let V = [ V1 ] and b = [ b1 ] (first i rows) + // [ V2 ] [ b2 ] + // where V1 is unit lower triangular. + // + // w := V1ᵀ * b1. + bi.Dcopy(i, a[k*lda+i:], lda, t[nb-1:], ldt) + bi.Dtrmv(blas.Lower, blas.Trans, blas.Unit, i, + a[k*lda:], lda, t[nb-1:], ldt) + + // w := w + V2ᵀ * b2. + bi.Dgemv(blas.Trans, n-k-i, i, + 1, a[(k+i)*lda:], lda, + a[(k+i)*lda+i:], lda, + 1, t[nb-1:], ldt) + + // w := Tᵀ * w. + bi.Dtrmv(blas.Upper, blas.Trans, blas.NonUnit, i, + t, ldt, t[nb-1:], ldt) + + // b2 := b2 - V2*w. + bi.Dgemv(blas.NoTrans, n-k-i, i, + -1, a[(k+i)*lda:], lda, + t[nb-1:], ldt, + 1, a[(k+i)*lda+i:], lda) + + // b1 := b1 - V1*w. + bi.Dtrmv(blas.Lower, blas.NoTrans, blas.Unit, i, + a[k*lda:], lda, t[nb-1:], ldt) + bi.Daxpy(i, -1, t[nb-1:], ldt, a[k*lda+i:], lda) + + a[(k+i-1)*lda+i-1] = ei + } + + // Generate the elementary reflector H_i to annihilate + // A[k+i+1:n,i]. + ei, tau[i] = impl.Dlarfg(n-k-i, a[(k+i)*lda+i], a[min(k+i+1, n-1)*lda+i:], lda) + a[(k+i)*lda+i] = 1 + + // Compute Y[k:n,i]. + bi.Dgemv(blas.NoTrans, n-k, n-k-i, + 1, a[k*lda+i+1:], lda, + a[(k+i)*lda+i:], lda, + 0, y[k*ldy+i:], ldy) + bi.Dgemv(blas.Trans, n-k-i, i, + 1, a[(k+i)*lda:], lda, + a[(k+i)*lda+i:], lda, + 0, t[i:], ldt) + bi.Dgemv(blas.NoTrans, n-k, i, + -1, y[k*ldy:], ldy, + t[i:], ldt, + 1, y[k*ldy+i:], ldy) + bi.Dscal(n-k, tau[i], y[k*ldy+i:], ldy) + + // Compute T[0:i,i]. + bi.Dscal(i, -tau[i], t[i:], ldt) + bi.Dtrmv(blas.Upper, blas.NoTrans, blas.NonUnit, i, + t, ldt, t[i:], ldt) + + t[i*ldt+i] = tau[i] + } + a[(k+nb-1)*lda+nb-1] = ei + + // Compute Y[0:k,0:nb]. + impl.Dlacpy(blas.All, k, nb, a[1:], lda, y, ldy) + bi.Dtrmm(blas.Right, blas.Lower, blas.NoTrans, blas.Unit, k, nb, + 1, a[k*lda:], lda, y, ldy) + if n > k+nb { + bi.Dgemm(blas.NoTrans, blas.NoTrans, k, nb, n-k-nb, + 1, a[1+nb:], lda, + a[(k+nb)*lda:], lda, + 1, y, ldy) + } + bi.Dtrmm(blas.Right, blas.Upper, blas.NoTrans, blas.NonUnit, k, nb, + 1, t, ldt, y, ldy) +} diff --git a/vendor/gonum.org/v1/gonum/lapack/gonum/dlaln2.go b/vendor/gonum.org/v1/gonum/lapack/gonum/dlaln2.go new file mode 100644 index 0000000000..54d443988b --- /dev/null +++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dlaln2.go @@ -0,0 +1,407 @@ +// Copyright ©2016 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gonum + +import "math" + +// Dlaln2 solves a linear equation or a system of 2 linear equations of the form +// +// (ca A - w D) X = scale B if trans == false, +// (ca Aᵀ - w D) X = scale B if trans == true, +// +// where A is a na×na real matrix, ca is a real scalar, D is a na×na diagonal +// real matrix, w is a scalar, real if nw == 1, complex if nw == 2, and X and B +// are na×1 matrices, real if w is real, complex if w is complex. +// +// If w is complex, X and B are represented as na×2 matrices, the first column +// of each being the real part and the second being the imaginary part. +// +// na and nw must be 1 or 2, otherwise Dlaln2 will panic. +// +// d1 and d2 are the diagonal elements of D. d2 is not used if na == 1. +// +// wr and wi represent the real and imaginary part, respectively, of the scalar +// w. wi is not used if nw == 1. +// +// smin is the desired lower bound on the singular values of A. This should be +// a safe distance away from underflow or overflow, say, between +// (underflow/machine precision) and (overflow*machine precision). +// +// If both singular values of (ca A - w D) are less than smin, smin*identity +// will be used instead of (ca A - w D). If only one singular value is less than +// smin, one element of (ca A - w D) will be perturbed enough to make the +// smallest singular value roughly smin. If both singular values are at least +// smin, (ca A - w D) will not be perturbed. In any case, the perturbation will +// be at most some small multiple of max(smin, ulp*norm(ca A - w D)). The +// singular values are computed by infinity-norm approximations, and thus will +// only be correct to a factor of 2 or so. +// +// All input quantities are assumed to be smaller than overflow by a reasonable +// factor. +// +// scale is a scaling factor less than or equal to 1 which is chosen so that X +// can be computed without overflow. X is further scaled if necessary to assure +// that norm(ca A - w D)*norm(X) is less than overflow. +// +// xnorm contains the infinity-norm of X when X is regarded as a na×nw real +// matrix. +// +// ok will be false if (ca A - w D) had to be perturbed to make its smallest +// singular value greater than smin, otherwise ok will be true. +// +// Dlaln2 is an internal routine. It is exported for testing purposes. +func (impl Implementation) Dlaln2(trans bool, na, nw int, smin, ca float64, a []float64, lda int, d1, d2 float64, b []float64, ldb int, wr, wi float64, x []float64, ldx int) (scale, xnorm float64, ok bool) { + // TODO(vladimir-ch): Consider splitting this function into two, one + // handling the real case (nw == 1) and the other handling the complex + // case (nw == 2). Given that Go has complex types, their signatures + // would be simpler and more natural, and the implementation not as + // convoluted. + + switch { + case na != 1 && na != 2: + panic(badNa) + case nw != 1 && nw != 2: + panic(badNw) + case lda < na: + panic(badLdA) + case len(a) < (na-1)*lda+na: + panic(shortA) + case ldb < nw: + panic(badLdB) + case len(b) < (na-1)*ldb+nw: + panic(shortB) + case ldx < nw: + panic(badLdX) + case len(x) < (na-1)*ldx+nw: + panic(shortX) + } + + smlnum := 2 * dlamchS + bignum := 1 / smlnum + smini := math.Max(smin, smlnum) + + ok = true + scale = 1 + + if na == 1 { + // 1×1 (i.e., scalar) system C X = B. + + if nw == 1 { + // Real 1×1 system. + + // C = ca A - w D. + csr := ca*a[0] - wr*d1 + cnorm := math.Abs(csr) + + // If |C| < smini, use C = smini. + if cnorm < smini { + csr = smini + cnorm = smini + ok = false + } + + // Check scaling for X = B / C. + bnorm := math.Abs(b[0]) + if cnorm < 1 && bnorm > math.Max(1, bignum*cnorm) { + scale = 1 / bnorm + } + + // Compute X. + x[0] = b[0] * scale / csr + xnorm = math.Abs(x[0]) + + return scale, xnorm, ok + } + + // Complex 1×1 system (w is complex). + + // C = ca A - w D. + csr := ca*a[0] - wr*d1 + csi := -wi * d1 + cnorm := math.Abs(csr) + math.Abs(csi) + + // If |C| < smini, use C = smini. + if cnorm < smini { + csr = smini + csi = 0 + cnorm = smini + ok = false + } + + // Check scaling for X = B / C. + bnorm := math.Abs(b[0]) + math.Abs(b[1]) + if cnorm < 1 && bnorm > math.Max(1, bignum*cnorm) { + scale = 1 / bnorm + } + + // Compute X. + cx := complex(scale*b[0], scale*b[1]) / complex(csr, csi) + x[0], x[1] = real(cx), imag(cx) + xnorm = math.Abs(x[0]) + math.Abs(x[1]) + + return scale, xnorm, ok + } + + // 2×2 system. + + // Compute the real part of + // C = ca A - w D + // or + // C = ca Aᵀ - w D. + crv := [4]float64{ + ca*a[0] - wr*d1, + ca * a[1], + ca * a[lda], + ca*a[lda+1] - wr*d2, + } + if trans { + crv[1] = ca * a[lda] + crv[2] = ca * a[1] + } + + pivot := [4][4]int{ + {0, 1, 2, 3}, + {1, 0, 3, 2}, + {2, 3, 0, 1}, + {3, 2, 1, 0}, + } + + if nw == 1 { + // Real 2×2 system (w is real). + + // Find the largest element in C. + var cmax float64 + var icmax int + for j, v := range crv { + v = math.Abs(v) + if v > cmax { + cmax = v + icmax = j + } + } + + // If norm(C) < smini, use smini*identity. + if cmax < smini { + bnorm := math.Max(math.Abs(b[0]), math.Abs(b[ldb])) + if smini < 1 && bnorm > math.Max(1, bignum*smini) { + scale = 1 / bnorm + } + temp := scale / smini + x[0] = temp * b[0] + x[ldx] = temp * b[ldb] + xnorm = temp * bnorm + ok = false + + return scale, xnorm, ok + } + + // Gaussian elimination with complete pivoting. + // Form upper triangular matrix + // [ur11 ur12] + // [ 0 ur22] + ur11 := crv[icmax] + ur12 := crv[pivot[icmax][1]] + cr21 := crv[pivot[icmax][2]] + cr22 := crv[pivot[icmax][3]] + ur11r := 1 / ur11 + lr21 := ur11r * cr21 + ur22 := cr22 - ur12*lr21 + + // If smaller pivot < smini, use smini. + if math.Abs(ur22) < smini { + ur22 = smini + ok = false + } + + var br1, br2 float64 + if icmax > 1 { + // If the pivot lies in the second row, swap the rows. + br1 = b[ldb] + br2 = b[0] + } else { + br1 = b[0] + br2 = b[ldb] + } + br2 -= lr21 * br1 // Apply the Gaussian elimination step to the right-hand side. + + bbnd := math.Max(math.Abs(ur22*ur11r*br1), math.Abs(br2)) + if bbnd > 1 && math.Abs(ur22) < 1 && bbnd >= bignum*math.Abs(ur22) { + scale = 1 / bbnd + } + + // Solve the linear system ur*xr=br. + xr2 := br2 * scale / ur22 + xr1 := scale*br1*ur11r - ur11r*ur12*xr2 + if icmax&0x1 != 0 { + // If the pivot lies in the second column, swap the components of the solution. + x[0] = xr2 + x[ldx] = xr1 + } else { + x[0] = xr1 + x[ldx] = xr2 + } + xnorm = math.Max(math.Abs(xr1), math.Abs(xr2)) + + // Further scaling if norm(A)*norm(X) > overflow. + if xnorm > 1 && cmax > 1 && xnorm > bignum/cmax { + temp := cmax / bignum + x[0] *= temp + x[ldx] *= temp + xnorm *= temp + scale *= temp + } + + return scale, xnorm, ok + } + + // Complex 2×2 system (w is complex). + + // Find the largest element in C. + civ := [4]float64{ + -wi * d1, + 0, + 0, + -wi * d2, + } + var cmax float64 + var icmax int + for j, v := range crv { + v := math.Abs(v) + if v+math.Abs(civ[j]) > cmax { + cmax = v + math.Abs(civ[j]) + icmax = j + } + } + + // If norm(C) < smini, use smini*identity. + if cmax < smini { + br1 := math.Abs(b[0]) + math.Abs(b[1]) + br2 := math.Abs(b[ldb]) + math.Abs(b[ldb+1]) + bnorm := math.Max(br1, br2) + if smini < 1 && bnorm > 1 && bnorm > bignum*smini { + scale = 1 / bnorm + } + temp := scale / smini + x[0] = temp * b[0] + x[1] = temp * b[1] + x[ldb] = temp * b[ldb] + x[ldb+1] = temp * b[ldb+1] + xnorm = temp * bnorm + ok = false + + return scale, xnorm, ok + } + + // Gaussian elimination with complete pivoting. + ur11 := crv[icmax] + ui11 := civ[icmax] + ur12 := crv[pivot[icmax][1]] + ui12 := civ[pivot[icmax][1]] + cr21 := crv[pivot[icmax][2]] + ci21 := civ[pivot[icmax][2]] + cr22 := crv[pivot[icmax][3]] + ci22 := civ[pivot[icmax][3]] + var ( + ur11r, ui11r float64 + lr21, li21 float64 + ur12s, ui12s float64 + ur22, ui22 float64 + ) + if icmax == 0 || icmax == 3 { + // Off-diagonals of pivoted C are real. + if math.Abs(ur11) > math.Abs(ui11) { + temp := ui11 / ur11 + ur11r = 1 / (ur11 * (1 + temp*temp)) + ui11r = -temp * ur11r + } else { + temp := ur11 / ui11 + ui11r = -1 / (ui11 * (1 + temp*temp)) + ur11r = -temp * ui11r + } + lr21 = cr21 * ur11r + li21 = cr21 * ui11r + ur12s = ur12 * ur11r + ui12s = ur12 * ui11r + ur22 = cr22 - ur12*lr21 + ui22 = ci22 - ur12*li21 + } else { + // Diagonals of pivoted C are real. + ur11r = 1 / ur11 + // ui11r is already 0. + lr21 = cr21 * ur11r + li21 = ci21 * ur11r + ur12s = ur12 * ur11r + ui12s = ui12 * ur11r + ur22 = cr22 - ur12*lr21 + ui12*li21 + ui22 = -ur12*li21 - ui12*lr21 + } + u22abs := math.Abs(ur22) + math.Abs(ui22) + + // If smaller pivot < smini, use smini. + if u22abs < smini { + ur22 = smini + ui22 = 0 + ok = false + } + + var br1, bi1 float64 + var br2, bi2 float64 + if icmax > 1 { + // If the pivot lies in the second row, swap the rows. + br1 = b[ldb] + bi1 = b[ldb+1] + br2 = b[0] + bi2 = b[1] + } else { + br1 = b[0] + bi1 = b[1] + br2 = b[ldb] + bi2 = b[ldb+1] + } + br2 += -lr21*br1 + li21*bi1 + bi2 += -li21*br1 - lr21*bi1 + + bbnd1 := u22abs * (math.Abs(ur11r) + math.Abs(ui11r)) * (math.Abs(br1) + math.Abs(bi1)) + bbnd2 := math.Abs(br2) + math.Abs(bi2) + bbnd := math.Max(bbnd1, bbnd2) + if bbnd > 1 && u22abs < 1 && bbnd >= bignum*u22abs { + scale = 1 / bbnd + br1 *= scale + bi1 *= scale + br2 *= scale + bi2 *= scale + } + + cx2 := complex(br2, bi2) / complex(ur22, ui22) + xr2, xi2 := real(cx2), imag(cx2) + xr1 := ur11r*br1 - ui11r*bi1 - ur12s*xr2 + ui12s*xi2 + xi1 := ui11r*br1 + ur11r*bi1 - ui12s*xr2 - ur12s*xi2 + if icmax&0x1 != 0 { + // If the pivot lies in the second column, swap the components of the solution. + x[0] = xr2 + x[1] = xi2 + x[ldx] = xr1 + x[ldx+1] = xi1 + } else { + x[0] = xr1 + x[1] = xi1 + x[ldx] = xr2 + x[ldx+1] = xi2 + } + xnorm = math.Max(math.Abs(xr1)+math.Abs(xi1), math.Abs(xr2)+math.Abs(xi2)) + + // Further scaling if norm(A)*norm(X) > overflow. + if xnorm > 1 && cmax > 1 && xnorm > bignum/cmax { + temp := cmax / bignum + x[0] *= temp + x[1] *= temp + x[ldx] *= temp + x[ldx+1] *= temp + xnorm *= temp + scale *= temp + } + + return scale, xnorm, ok +} diff --git a/vendor/gonum.org/v1/gonum/lapack/gonum/dlangb.go b/vendor/gonum.org/v1/gonum/lapack/gonum/dlangb.go new file mode 100644 index 0000000000..4b7b449f63 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dlangb.go @@ -0,0 +1,87 @@ +// Copyright ©2021 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gonum + +import ( + "math" + + "gonum.org/v1/gonum/internal/asm/f64" + "gonum.org/v1/gonum/lapack" +) + +// Dlangb returns the given norm of an m×n band matrix with kl sub-diagonals and +// ku super-diagonals. +func (impl Implementation) Dlangb(norm lapack.MatrixNorm, m, n, kl, ku int, ab []float64, ldab int) float64 { + ncol := kl + 1 + ku + switch { + case norm != lapack.MaxAbs && norm != lapack.MaxRowSum && norm != lapack.MaxColumnSum && norm != lapack.Frobenius: + panic(badNorm) + case m < 0: + panic(mLT0) + case n < 0: + panic(nLT0) + case kl < 0: + panic(klLT0) + case ku < 0: + panic(kuLT0) + case ldab < ncol: + panic(badLdA) + } + + // Quick return if possible. + if m == 0 || n == 0 { + return 0 + } + + switch { + case len(ab) < min(m, n+kl)*ldab: + panic(shortAB) + } + + var value float64 + switch norm { + case lapack.MaxAbs: + for i := 0; i < min(m, n+kl); i++ { + l := max(0, kl-i) + u := min(n+kl-i, ncol) + for _, aij := range ab[i*ldab+l : i*ldab+u] { + aij = math.Abs(aij) + if aij > value || math.IsNaN(aij) { + value = aij + } + } + } + case lapack.MaxRowSum: + for i := 0; i < min(m, n+kl); i++ { + l := max(0, kl-i) + u := min(n+kl-i, ncol) + sum := f64.L1Norm(ab[i*ldab+l : i*ldab+u]) + if sum > value || math.IsNaN(sum) { + value = sum + } + } + case lapack.MaxColumnSum: + for j := 0; j < min(m+ku, n); j++ { + jb := min(kl+j, ncol-1) + ib := max(0, j-ku) + jlen := min(j+kl, m-1) - ib + 1 + sum := f64.L1NormInc(ab[ib*ldab+jb:], jlen, max(1, ldab-1)) + if sum > value || math.IsNaN(sum) { + value = sum + } + } + case lapack.Frobenius: + scale := 0.0 + sum := 1.0 + for i := 0; i < min(m, n+kl); i++ { + l := max(0, kl-i) + u := min(n+kl-i, ncol) + ilen := u - l + scale, sum = impl.Dlassq(ilen, ab[i*ldab+l:], 1, scale, sum) + } + value = scale * math.Sqrt(sum) + } + return value +} diff --git a/vendor/gonum.org/v1/gonum/lapack/gonum/dlange.go b/vendor/gonum.org/v1/gonum/lapack/gonum/dlange.go new file mode 100644 index 0000000000..3a00dce1da --- /dev/null +++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dlange.go @@ -0,0 +1,89 @@ +// Copyright ©2015 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gonum + +import ( + "math" + + "gonum.org/v1/gonum/lapack" +) + +// Dlange returns the value of the specified norm of a general m×n matrix A: +// +// lapack.MaxAbs: the maximum absolute value of any element. +// lapack.MaxColumnSum: the maximum column sum of the absolute values of the elements (1-norm). +// lapack.MaxRowSum: the maximum row sum of the absolute values of the elements (infinity-norm). +// lapack.Frobenius: the square root of the sum of the squares of the elements (Frobenius norm). +// +// If norm == lapack.MaxColumnSum, work must be of length n, and this function will +// panic otherwise. There are no restrictions on work for the other matrix norms. +func (impl Implementation) Dlange(norm lapack.MatrixNorm, m, n int, a []float64, lda int, work []float64) float64 { + // TODO(btracey): These should probably be refactored to use BLAS calls. + switch { + case norm != lapack.MaxRowSum && norm != lapack.MaxColumnSum && norm != lapack.Frobenius && norm != lapack.MaxAbs: + panic(badNorm) + case m < 0: + panic(mLT0) + case n < 0: + panic(nLT0) + case lda < max(1, n): + panic(badLdA) + } + + // Quick return if possible. + if m == 0 || n == 0 { + return 0 + } + + switch { + case len(a) < (m-1)*lda+n: + panic(badLdA) + case norm == lapack.MaxColumnSum && len(work) < n: + panic(shortWork) + } + + switch norm { + case lapack.MaxAbs: + var value float64 + for i := 0; i < m; i++ { + for j := 0; j < n; j++ { + value = math.Max(value, math.Abs(a[i*lda+j])) + } + } + return value + case lapack.MaxColumnSum: + for i := 0; i < n; i++ { + work[i] = 0 + } + for i := 0; i < m; i++ { + for j := 0; j < n; j++ { + work[j] += math.Abs(a[i*lda+j]) + } + } + var value float64 + for i := 0; i < n; i++ { + value = math.Max(value, work[i]) + } + return value + case lapack.MaxRowSum: + var value float64 + for i := 0; i < m; i++ { + var sum float64 + for j := 0; j < n; j++ { + sum += math.Abs(a[i*lda+j]) + } + value = math.Max(value, sum) + } + return value + default: + // lapack.Frobenius + scale := 0.0 + sum := 1.0 + for i := 0; i < m; i++ { + scale, sum = impl.Dlassq(n, a[i*lda:], 1, scale, sum) + } + return scale * math.Sqrt(sum) + } +} diff --git a/vendor/gonum.org/v1/gonum/lapack/gonum/dlangt.go b/vendor/gonum.org/v1/gonum/lapack/gonum/dlangt.go new file mode 100644 index 0000000000..cd1c49b5c3 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dlangt.go @@ -0,0 +1,115 @@ +// Copyright ©2020 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gonum + +import ( + "math" + + "gonum.org/v1/gonum/lapack" +) + +// Dlangt returns the value of the given norm of an n×n tridiagonal matrix +// represented by the three diagonals. +// +// d must have length at least n and dl and du must have length at least n-1. +func (impl Implementation) Dlangt(norm lapack.MatrixNorm, n int, dl, d, du []float64) float64 { + switch { + case norm != lapack.MaxAbs && norm != lapack.MaxRowSum && norm != lapack.MaxColumnSum && norm != lapack.Frobenius: + panic(badNorm) + case n < 0: + panic(nLT0) + } + + if n == 0 { + return 0 + } + + switch { + case len(dl) < n-1: + panic(shortDL) + case len(d) < n: + panic(shortD) + case len(du) < n-1: + panic(shortDU) + } + + dl = dl[:n-1] + d = d[:n] + du = du[:n-1] + + var anorm float64 + switch norm { + case lapack.MaxAbs: + for _, diag := range [][]float64{dl, d, du} { + for _, di := range diag { + if math.IsNaN(di) { + return di + } + di = math.Abs(di) + if di > anorm { + anorm = di + } + } + } + case lapack.MaxColumnSum: + if n == 1 { + return math.Abs(d[0]) + } + anorm = math.Abs(d[0]) + math.Abs(dl[0]) + if math.IsNaN(anorm) { + return anorm + } + tmp := math.Abs(du[n-2]) + math.Abs(d[n-1]) + if math.IsNaN(tmp) { + return tmp + } + if tmp > anorm { + anorm = tmp + } + for i := 1; i < n-1; i++ { + tmp = math.Abs(du[i-1]) + math.Abs(d[i]) + math.Abs(dl[i]) + if math.IsNaN(tmp) { + return tmp + } + if tmp > anorm { + anorm = tmp + } + } + case lapack.MaxRowSum: + if n == 1 { + return math.Abs(d[0]) + } + anorm = math.Abs(d[0]) + math.Abs(du[0]) + if math.IsNaN(anorm) { + return anorm + } + tmp := math.Abs(dl[n-2]) + math.Abs(d[n-1]) + if math.IsNaN(tmp) { + return tmp + } + if tmp > anorm { + anorm = tmp + } + for i := 1; i < n-1; i++ { + tmp = math.Abs(dl[i-1]) + math.Abs(d[i]) + math.Abs(du[i]) + if math.IsNaN(tmp) { + return tmp + } + if tmp > anorm { + anorm = tmp + } + } + case lapack.Frobenius: + scale := 0.0 + ssq := 1.0 + scale, ssq = impl.Dlassq(n, d, 1, scale, ssq) + if n > 1 { + scale, ssq = impl.Dlassq(n-1, dl, 1, scale, ssq) + scale, ssq = impl.Dlassq(n-1, du, 1, scale, ssq) + } + anorm = scale * math.Sqrt(ssq) + } + return anorm +} diff --git a/vendor/gonum.org/v1/gonum/lapack/gonum/dlanhs.go b/vendor/gonum.org/v1/gonum/lapack/gonum/dlanhs.go new file mode 100644 index 0000000000..054b90f02b --- /dev/null +++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dlanhs.go @@ -0,0 +1,78 @@ +// Copyright ©2023 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gonum + +import ( + "math" + + "gonum.org/v1/gonum/blas/blas64" + "gonum.org/v1/gonum/lapack" +) + +// Dlanhs returns the value of the one norm, or the Frobenius norm, or the +// infinity norm, or the element of largest absolute value of a Hessenberg +// matrix A. +// +// If norm is lapack.MaxColumnSum, work must have length at least n. +func (impl Implementation) Dlanhs(norm lapack.MatrixNorm, n int, a []float64, lda int, work []float64) float64 { + switch { + case norm != lapack.MaxRowSum && norm != lapack.MaxAbs && norm != lapack.MaxColumnSum && norm != lapack.Frobenius: + panic(badNorm) + case n < 0: + panic(nLT0) + case lda < max(1, n): + panic(badLdA) + } + + if n == 0 { + return 0 + } + + switch { + case len(a) < (n-1)*lda+n: + panic(shortA) + case norm == lapack.MaxColumnSum && len(work) < n: + panic(shortWork) + } + + bi := blas64.Implementation() + var value float64 + switch norm { + case lapack.MaxAbs: + for i := 0; i < n; i++ { + minj := max(0, i-1) + for _, v := range a[i*lda+minj : i*lda+n] { + value = math.Max(value, math.Abs(v)) + } + } + case lapack.MaxColumnSum: + for i := 0; i < n; i++ { + work[i] = 0 + } + for i := 0; i < n; i++ { + for j := max(0, i-1); j < n; j++ { + work[j] += math.Abs(a[i*lda+j]) + } + } + for _, v := range work[:n] { + value = math.Max(value, v) + } + case lapack.MaxRowSum: + for i := 0; i < n; i++ { + minj := max(0, i-1) + sum := bi.Dasum(n-minj, a[i*lda+minj:], 1) + value = math.Max(value, sum) + } + case lapack.Frobenius: + scale := 0.0 + sum := 1.0 + for i := 0; i < n; i++ { + minj := max(0, i-1) + scale, sum = impl.Dlassq(n-minj, a[i*lda+minj:], 1, scale, sum) + } + value = scale * math.Sqrt(sum) + } + return value +} diff --git a/vendor/gonum.org/v1/gonum/lapack/gonum/dlansb.go b/vendor/gonum.org/v1/gonum/lapack/gonum/dlansb.go new file mode 100644 index 0000000000..17801f84b6 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dlansb.go @@ -0,0 +1,131 @@ +// Copyright ©2019 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gonum + +import ( + "math" + + "gonum.org/v1/gonum/blas" + "gonum.org/v1/gonum/lapack" +) + +// Dlansb returns the given norm of an n×n symmetric band matrix with kd +// super-diagonals. +// +// When norm is lapack.MaxColumnSum or lapack.MaxRowSum, the length of work must +// be at least n. +func (impl Implementation) Dlansb(norm lapack.MatrixNorm, uplo blas.Uplo, n, kd int, ab []float64, ldab int, work []float64) float64 { + switch { + case norm != lapack.MaxAbs && norm != lapack.MaxRowSum && norm != lapack.MaxColumnSum && norm != lapack.Frobenius: + panic(badNorm) + case uplo != blas.Upper && uplo != blas.Lower: + panic(badUplo) + case n < 0: + panic(nLT0) + case kd < 0: + panic(kdLT0) + case ldab < kd+1: + panic(badLdA) + } + + // Quick return if possible. + if n == 0 { + return 0 + } + + switch { + case len(ab) < (n-1)*ldab+kd+1: + panic(shortAB) + case len(work) < n && (norm == lapack.MaxColumnSum || norm == lapack.MaxRowSum): + panic(shortWork) + } + + var value float64 + switch norm { + case lapack.MaxAbs: + if uplo == blas.Upper { + for i := 0; i < n; i++ { + for j := 0; j < min(n-i, kd+1); j++ { + aij := math.Abs(ab[i*ldab+j]) + if aij > value || math.IsNaN(aij) { + value = aij + } + } + } + } else { + for i := 0; i < n; i++ { + for j := max(0, kd-i); j < kd+1; j++ { + aij := math.Abs(ab[i*ldab+j]) + if aij > value || math.IsNaN(aij) { + value = aij + } + } + } + } + case lapack.MaxColumnSum, lapack.MaxRowSum: + work = work[:n] + var sum float64 + if uplo == blas.Upper { + for i := range work { + work[i] = 0 + } + for i := 0; i < n; i++ { + sum := work[i] + math.Abs(ab[i*ldab]) + for j := i + 1; j < min(i+kd+1, n); j++ { + aij := math.Abs(ab[i*ldab+j-i]) + sum += aij + work[j] += aij + } + if sum > value || math.IsNaN(sum) { + value = sum + } + } + } else { + for i := 0; i < n; i++ { + sum = 0 + for j := max(0, i-kd); j < i; j++ { + aij := math.Abs(ab[i*ldab+kd+j-i]) + sum += aij + work[j] += aij + } + work[i] = sum + math.Abs(ab[i*ldab+kd]) + } + for _, sum := range work { + if sum > value || math.IsNaN(sum) { + value = sum + } + } + } + case lapack.Frobenius: + scale := 0.0 + sum := 1.0 + if uplo == blas.Upper { + if kd > 0 { + // Sum off-diagonals. + for i := 0; i < n-1; i++ { + ilen := min(n-i-1, kd) + scale, sum = impl.Dlassq(ilen, ab[i*ldab+1:], 1, scale, sum) + } + sum *= 2 + } + // Sum diagonal. + scale, sum = impl.Dlassq(n, ab, ldab, scale, sum) + } else { + if kd > 0 { + // Sum off-diagonals. + for i := 1; i < n; i++ { + ilen := min(i, kd) + scale, sum = impl.Dlassq(ilen, ab[i*ldab+kd-ilen:], 1, scale, sum) + } + sum *= 2 + } + // Sum diagonal. + scale, sum = impl.Dlassq(n, ab[kd:], ldab, scale, sum) + } + value = scale * math.Sqrt(sum) + } + + return value +} diff --git a/vendor/gonum.org/v1/gonum/lapack/gonum/dlanst.go b/vendor/gonum.org/v1/gonum/lapack/gonum/dlanst.go new file mode 100644 index 0000000000..9ca1897e34 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dlanst.go @@ -0,0 +1,75 @@ +// Copyright ©2016 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gonum + +import ( + "math" + + "gonum.org/v1/gonum/lapack" +) + +// Dlanst computes the specified norm of a symmetric tridiagonal matrix A. +// The diagonal elements of A are stored in d and the off-diagonal elements +// are stored in e. +func (impl Implementation) Dlanst(norm lapack.MatrixNorm, n int, d, e []float64) float64 { + switch { + case norm != lapack.MaxRowSum && norm != lapack.MaxColumnSum && norm != lapack.Frobenius && norm != lapack.MaxAbs: + panic(badNorm) + case n < 0: + panic(nLT0) + } + if n == 0 { + return 0 + } + switch { + case len(d) < n: + panic(shortD) + case len(e) < n-1: + panic(shortE) + } + + switch norm { + default: + panic(badNorm) + case lapack.MaxAbs: + anorm := math.Abs(d[n-1]) + for i := 0; i < n-1; i++ { + sum := math.Abs(d[i]) + if anorm < sum || math.IsNaN(sum) { + anorm = sum + } + sum = math.Abs(e[i]) + if anorm < sum || math.IsNaN(sum) { + anorm = sum + } + } + return anorm + case lapack.MaxColumnSum, lapack.MaxRowSum: + if n == 1 { + return math.Abs(d[0]) + } + anorm := math.Abs(d[0]) + math.Abs(e[0]) + sum := math.Abs(e[n-2]) + math.Abs(d[n-1]) + if anorm < sum || math.IsNaN(sum) { + anorm = sum + } + for i := 1; i < n-1; i++ { + sum := math.Abs(d[i]) + math.Abs(e[i]) + math.Abs(e[i-1]) + if anorm < sum || math.IsNaN(sum) { + anorm = sum + } + } + return anorm + case lapack.Frobenius: + var scale float64 + sum := 1.0 + if n > 1 { + scale, sum = impl.Dlassq(n-1, e, 1, scale, sum) + sum = 2 * sum + } + scale, sum = impl.Dlassq(n, d, 1, scale, sum) + return scale * math.Sqrt(sum) + } +} diff --git a/vendor/gonum.org/v1/gonum/lapack/gonum/dlansy.go b/vendor/gonum.org/v1/gonum/lapack/gonum/dlansy.go new file mode 100644 index 0000000000..b972c72e55 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dlansy.go @@ -0,0 +1,125 @@ +// Copyright ©2015 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gonum + +import ( + "math" + + "gonum.org/v1/gonum/blas" + "gonum.org/v1/gonum/lapack" +) + +// Dlansy returns the value of the specified norm of an n×n symmetric matrix. If +// norm == lapack.MaxColumnSum or norm == lapack.MaxRowSum, work must have length +// at least n, otherwise work is unused. +func (impl Implementation) Dlansy(norm lapack.MatrixNorm, uplo blas.Uplo, n int, a []float64, lda int, work []float64) float64 { + switch { + case norm != lapack.MaxRowSum && norm != lapack.MaxColumnSum && norm != lapack.Frobenius && norm != lapack.MaxAbs: + panic(badNorm) + case uplo != blas.Upper && uplo != blas.Lower: + panic(badUplo) + case n < 0: + panic(nLT0) + case lda < max(1, n): + panic(badLdA) + } + + // Quick return if possible. + if n == 0 { + return 0 + } + + switch { + case len(a) < (n-1)*lda+n: + panic(shortA) + case (norm == lapack.MaxColumnSum || norm == lapack.MaxRowSum) && len(work) < n: + panic(shortWork) + } + + switch norm { + case lapack.MaxAbs: + if uplo == blas.Upper { + var max float64 + for i := 0; i < n; i++ { + for j := i; j < n; j++ { + v := math.Abs(a[i*lda+j]) + if math.IsNaN(v) { + return math.NaN() + } + if v > max { + max = v + } + } + } + return max + } + var max float64 + for i := 0; i < n; i++ { + for j := 0; j <= i; j++ { + v := math.Abs(a[i*lda+j]) + if math.IsNaN(v) { + return math.NaN() + } + if v > max { + max = v + } + } + } + return max + case lapack.MaxRowSum, lapack.MaxColumnSum: + // A symmetric matrix has the same 1-norm and ∞-norm. + for i := 0; i < n; i++ { + work[i] = 0 + } + if uplo == blas.Upper { + for i := 0; i < n; i++ { + work[i] += math.Abs(a[i*lda+i]) + for j := i + 1; j < n; j++ { + v := math.Abs(a[i*lda+j]) + work[i] += v + work[j] += v + } + } + } else { + for i := 0; i < n; i++ { + for j := 0; j < i; j++ { + v := math.Abs(a[i*lda+j]) + work[i] += v + work[j] += v + } + work[i] += math.Abs(a[i*lda+i]) + } + } + var max float64 + for i := 0; i < n; i++ { + v := work[i] + if math.IsNaN(v) { + return math.NaN() + } + if v > max { + max = v + } + } + return max + default: + // lapack.Frobenius: + scale := 0.0 + sum := 1.0 + // Sum off-diagonals. + if uplo == blas.Upper { + for i := 0; i < n-1; i++ { + scale, sum = impl.Dlassq(n-i-1, a[i*lda+i+1:], 1, scale, sum) + } + } else { + for i := 1; i < n; i++ { + scale, sum = impl.Dlassq(i, a[i*lda:], 1, scale, sum) + } + } + sum *= 2 + // Sum diagonal. + scale, sum = impl.Dlassq(n, a, lda+1, scale, sum) + return scale * math.Sqrt(sum) + } +} diff --git a/vendor/gonum.org/v1/gonum/lapack/gonum/dlantb.go b/vendor/gonum.org/v1/gonum/lapack/gonum/dlantb.go new file mode 100644 index 0000000000..ceab2a6af3 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dlantb.go @@ -0,0 +1,209 @@ +// Copyright ©2020 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gonum + +import ( + "math" + + "gonum.org/v1/gonum/blas" + "gonum.org/v1/gonum/lapack" +) + +// Dlantb returns the value of the given norm of an n×n triangular band matrix A +// with k+1 diagonals. +// +// When norm is lapack.MaxColumnSum, the length of work must be at least n. +func (impl Implementation) Dlantb(norm lapack.MatrixNorm, uplo blas.Uplo, diag blas.Diag, n, k int, a []float64, lda int, work []float64) float64 { + switch { + case norm != lapack.MaxAbs && norm != lapack.MaxRowSum && norm != lapack.MaxColumnSum && norm != lapack.Frobenius: + panic(badNorm) + case uplo != blas.Upper && uplo != blas.Lower: + panic(badUplo) + case n < 0: + panic(nLT0) + case k < 0: + panic(kdLT0) + case lda < k+1: + panic(badLdA) + } + + // Quick return if possible. + if n == 0 { + return 0 + } + + switch { + case len(a) < (n-1)*lda+k+1: + panic(shortAB) + case len(work) < n && norm == lapack.MaxColumnSum: + panic(shortWork) + } + + var value float64 + switch norm { + case lapack.MaxAbs: + if uplo == blas.Upper { + var jfirst int + if diag == blas.Unit { + value = 1 + jfirst = 1 + } + for i := 0; i < n; i++ { + for _, aij := range a[i*lda+jfirst : i*lda+min(n-i, k+1)] { + if math.IsNaN(aij) { + return aij + } + aij = math.Abs(aij) + if aij > value { + value = aij + } + } + } + } else { + jlast := k + 1 + if diag == blas.Unit { + value = 1 + jlast = k + } + for i := 0; i < n; i++ { + for _, aij := range a[i*lda+max(0, k-i) : i*lda+jlast] { + if math.IsNaN(aij) { + return math.NaN() + } + aij = math.Abs(aij) + if aij > value { + value = aij + } + } + } + } + case lapack.MaxRowSum: + var sum float64 + if uplo == blas.Upper { + var jfirst int + if diag == blas.Unit { + jfirst = 1 + } + for i := 0; i < n; i++ { + sum = 0 + if diag == blas.Unit { + sum = 1 + } + for _, aij := range a[i*lda+jfirst : i*lda+min(n-i, k+1)] { + sum += math.Abs(aij) + } + if math.IsNaN(sum) { + return math.NaN() + } + if sum > value { + value = sum + } + } + } else { + jlast := k + 1 + if diag == blas.Unit { + jlast = k + } + for i := 0; i < n; i++ { + sum = 0 + if diag == blas.Unit { + sum = 1 + } + for _, aij := range a[i*lda+max(0, k-i) : i*lda+jlast] { + sum += math.Abs(aij) + } + if math.IsNaN(sum) { + return math.NaN() + } + if sum > value { + value = sum + } + } + } + case lapack.MaxColumnSum: + work = work[:n] + if diag == blas.Unit { + for i := range work { + work[i] = 1 + } + } else { + for i := range work { + work[i] = 0 + } + } + if uplo == blas.Upper { + var jfirst int + if diag == blas.Unit { + jfirst = 1 + } + for i := 0; i < n; i++ { + for j, aij := range a[i*lda+jfirst : i*lda+min(n-i, k+1)] { + work[i+jfirst+j] += math.Abs(aij) + } + } + } else { + jlast := k + 1 + if diag == blas.Unit { + jlast = k + } + for i := 0; i < n; i++ { + off := max(0, k-i) + for j, aij := range a[i*lda+off : i*lda+jlast] { + work[i+j+off-k] += math.Abs(aij) + } + } + } + for _, wi := range work { + if math.IsNaN(wi) { + return math.NaN() + } + if wi > value { + value = wi + } + } + case lapack.Frobenius: + var scale, sum float64 + switch uplo { + case blas.Upper: + if diag == blas.Unit { + scale = 1 + sum = float64(n) + if k > 0 { + for i := 0; i < n-1; i++ { + ilen := min(n-i-1, k) + scale, sum = impl.Dlassq(ilen, a[i*lda+1:], 1, scale, sum) + } + } + } else { + scale = 0 + sum = 1 + for i := 0; i < n; i++ { + ilen := min(n-i, k+1) + scale, sum = impl.Dlassq(ilen, a[i*lda:], 1, scale, sum) + } + } + case blas.Lower: + if diag == blas.Unit { + scale = 1 + sum = float64(n) + if k > 0 { + for i := 1; i < n; i++ { + ilen := min(i, k) + scale, sum = impl.Dlassq(ilen, a[i*lda+k-ilen:], 1, scale, sum) + } + } + } else { + scale = 0 + sum = 1 + for i := 0; i < n; i++ { + ilen := min(i, k) + 1 + scale, sum = impl.Dlassq(ilen, a[i*lda+k+1-ilen:], 1, scale, sum) + } + } + } + value = scale * math.Sqrt(sum) + } + return value +} diff --git a/vendor/gonum.org/v1/gonum/lapack/gonum/dlantr.go b/vendor/gonum.org/v1/gonum/lapack/gonum/dlantr.go new file mode 100644 index 0000000000..33569832fd --- /dev/null +++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dlantr.go @@ -0,0 +1,252 @@ +// Copyright ©2015 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gonum + +import ( + "math" + + "gonum.org/v1/gonum/blas" + "gonum.org/v1/gonum/lapack" +) + +// Dlantr computes the specified norm of an m×n trapezoidal matrix A. If +// norm == lapack.MaxColumnSum work must have length at least n, otherwise work +// is unused. +func (impl Implementation) Dlantr(norm lapack.MatrixNorm, uplo blas.Uplo, diag blas.Diag, m, n int, a []float64, lda int, work []float64) float64 { + switch { + case norm != lapack.MaxRowSum && norm != lapack.MaxColumnSum && norm != lapack.Frobenius && norm != lapack.MaxAbs: + panic(badNorm) + case uplo != blas.Upper && uplo != blas.Lower: + panic(badUplo) + case diag != blas.Unit && diag != blas.NonUnit: + panic(badDiag) + case m < 0: + panic(mLT0) + case n < 0: + panic(nLT0) + case lda < max(1, n): + panic(badLdA) + } + + // Quick return if possible. + minmn := min(m, n) + if minmn == 0 { + return 0 + } + + switch { + case len(a) < (m-1)*lda+n: + panic(shortA) + case norm == lapack.MaxColumnSum && len(work) < n: + panic(shortWork) + } + + switch norm { + case lapack.MaxAbs: + if diag == blas.Unit { + value := 1.0 + if uplo == blas.Upper { + for i := 0; i < m; i++ { + for j := i + 1; j < n; j++ { + tmp := math.Abs(a[i*lda+j]) + if math.IsNaN(tmp) { + return tmp + } + if tmp > value { + value = tmp + } + } + } + return value + } + for i := 1; i < m; i++ { + for j := 0; j < min(i, n); j++ { + tmp := math.Abs(a[i*lda+j]) + if math.IsNaN(tmp) { + return tmp + } + if tmp > value { + value = tmp + } + } + } + return value + } + var value float64 + if uplo == blas.Upper { + for i := 0; i < m; i++ { + for j := i; j < n; j++ { + tmp := math.Abs(a[i*lda+j]) + if math.IsNaN(tmp) { + return tmp + } + if tmp > value { + value = tmp + } + } + } + return value + } + for i := 0; i < m; i++ { + for j := 0; j <= min(i, n-1); j++ { + tmp := math.Abs(a[i*lda+j]) + if math.IsNaN(tmp) { + return tmp + } + if tmp > value { + value = tmp + } + } + } + return value + case lapack.MaxColumnSum: + if diag == blas.Unit { + for i := 0; i < minmn; i++ { + work[i] = 1 + } + for i := minmn; i < n; i++ { + work[i] = 0 + } + if uplo == blas.Upper { + for i := 0; i < m; i++ { + for j := i + 1; j < n; j++ { + work[j] += math.Abs(a[i*lda+j]) + } + } + } else { + for i := 1; i < m; i++ { + for j := 0; j < min(i, n); j++ { + work[j] += math.Abs(a[i*lda+j]) + } + } + } + } else { + for i := 0; i < n; i++ { + work[i] = 0 + } + if uplo == blas.Upper { + for i := 0; i < m; i++ { + for j := i; j < n; j++ { + work[j] += math.Abs(a[i*lda+j]) + } + } + } else { + for i := 0; i < m; i++ { + for j := 0; j <= min(i, n-1); j++ { + work[j] += math.Abs(a[i*lda+j]) + } + } + } + } + var max float64 + for _, v := range work[:n] { + if math.IsNaN(v) { + return math.NaN() + } + if v > max { + max = v + } + } + return max + case lapack.MaxRowSum: + var maxsum float64 + if diag == blas.Unit { + if uplo == blas.Upper { + for i := 0; i < m; i++ { + var sum float64 + if i < minmn { + sum = 1 + } + for j := i + 1; j < n; j++ { + sum += math.Abs(a[i*lda+j]) + } + if math.IsNaN(sum) { + return math.NaN() + } + if sum > maxsum { + maxsum = sum + } + } + return maxsum + } else { + for i := 0; i < m; i++ { + var sum float64 + if i < minmn { + sum = 1 + } + for j := 0; j < min(i, n); j++ { + sum += math.Abs(a[i*lda+j]) + } + if math.IsNaN(sum) { + return math.NaN() + } + if sum > maxsum { + maxsum = sum + } + } + return maxsum + } + } else { + if uplo == blas.Upper { + for i := 0; i < m; i++ { + var sum float64 + for j := i; j < n; j++ { + sum += math.Abs(a[i*lda+j]) + } + if math.IsNaN(sum) { + return sum + } + if sum > maxsum { + maxsum = sum + } + } + return maxsum + } else { + for i := 0; i < m; i++ { + var sum float64 + for j := 0; j <= min(i, n-1); j++ { + sum += math.Abs(a[i*lda+j]) + } + if math.IsNaN(sum) { + return sum + } + if sum > maxsum { + maxsum = sum + } + } + return maxsum + } + } + default: + // lapack.Frobenius: + var scale, sum float64 + if diag == blas.Unit { + scale = 1 + sum = float64(min(m, n)) + if uplo == blas.Upper { + for i := 0; i < min(m, n); i++ { + scale, sum = impl.Dlassq(n-i-1, a[i*lda+i+1:], 1, scale, sum) + } + } else { + for i := 1; i < m; i++ { + scale, sum = impl.Dlassq(min(i, n), a[i*lda:], 1, scale, sum) + } + } + } else { + scale = 0 + sum = 1 + if uplo == blas.Upper { + for i := 0; i < min(m, n); i++ { + scale, sum = impl.Dlassq(n-i, a[i*lda+i:], 1, scale, sum) + } + } else { + for i := 0; i < m; i++ { + scale, sum = impl.Dlassq(min(i+1, n), a[i*lda:], 1, scale, sum) + } + } + } + return scale * math.Sqrt(sum) + } +} diff --git a/vendor/gonum.org/v1/gonum/lapack/gonum/dlanv2.go b/vendor/gonum.org/v1/gonum/lapack/gonum/dlanv2.go new file mode 100644 index 0000000000..360f71b1d3 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dlanv2.go @@ -0,0 +1,151 @@ +// Copyright ©2016 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gonum + +import "math" + +// Dlanv2 computes the Schur factorization of a real 2×2 matrix: +// +// [ a b ] = [ cs -sn ] * [ aa bb ] * [ cs sn ] +// [ c d ] [ sn cs ] [ cc dd ] * [-sn cs ] +// +// If cc is zero, aa and dd are real eigenvalues of the matrix. Otherwise it +// holds that aa = dd and bb*cc < 0, and aa ± sqrt(bb*cc) are complex conjugate +// eigenvalues. The real and imaginary parts of the eigenvalues are returned in +// (rt1r,rt1i) and (rt2r,rt2i). +func (impl Implementation) Dlanv2(a, b, c, d float64) (aa, bb, cc, dd float64, rt1r, rt1i, rt2r, rt2i float64, cs, sn float64) { + switch { + case c == 0: // Matrix is already upper triangular. + aa = a + bb = b + cc = 0 + dd = d + cs = 1 + sn = 0 + case b == 0: // Matrix is lower triangular, swap rows and columns. + aa = d + bb = -c + cc = 0 + dd = a + cs = 0 + sn = 1 + case a == d && math.Signbit(b) != math.Signbit(c): // Matrix is already in the standard Schur form. + aa = a + bb = b + cc = c + dd = d + cs = 1 + sn = 0 + default: + temp := a - d + p := temp / 2 + bcmax := math.Max(math.Abs(b), math.Abs(c)) + bcmis := math.Min(math.Abs(b), math.Abs(c)) + if b*c < 0 { + bcmis *= -1 + } + scale := math.Max(math.Abs(p), bcmax) + z := p/scale*p + bcmax/scale*bcmis + eps := dlamchP + + if z >= 4*eps { + // Real eigenvalues. Compute aa and dd. + if p > 0 { + z = p + math.Sqrt(scale)*math.Sqrt(z) + } else { + z = p - math.Sqrt(scale)*math.Sqrt(z) + } + aa = d + z + dd = d - bcmax/z*bcmis + // Compute bb and the rotation matrix. + tau := impl.Dlapy2(c, z) + cs = z / tau + sn = c / tau + bb = b - c + cc = 0 + } else { + // Complex eigenvalues, or real (almost) equal eigenvalues. + // Make diagonal elements equal. + safmn2 := math.Pow(dlamchB, math.Log(dlamchS/dlamchE)/math.Log(dlamchB)/2) + safmx2 := 1 / safmn2 + sigma := b + c + loop: + for iter := 0; iter < 20; iter++ { + scale = math.Max(math.Abs(temp), math.Abs(sigma)) + switch { + case scale >= safmx2: + sigma *= safmn2 + temp *= safmn2 + case scale <= safmn2: + sigma *= safmx2 + temp *= safmx2 + default: + break loop + } + } + p = temp / 2 + tau := impl.Dlapy2(sigma, temp) + cs = math.Sqrt((1 + math.Abs(sigma)/tau) / 2) + sn = -p / (tau * cs) + if sigma < 0 { + sn *= -1 + } + // Compute [ aa bb ] = [ a b ] [ cs -sn ] + // [ cc dd ] [ c d ] [ sn cs ] + aa = a*cs + b*sn + bb = -a*sn + b*cs + cc = c*cs + d*sn + dd = -c*sn + d*cs + // Compute [ a b ] = [ cs sn ] [ aa bb ] + // [ c d ] [-sn cs ] [ cc dd ] + a = aa*cs + cc*sn + b = bb*cs + dd*sn + c = -aa*sn + cc*cs + d = -bb*sn + dd*cs + + temp = (a + d) / 2 + aa = temp + bb = b + cc = c + dd = temp + + if cc != 0 { + if bb != 0 { + if math.Signbit(bb) == math.Signbit(cc) { + // Real eigenvalues, reduce to + // upper triangular form. + sab := math.Sqrt(math.Abs(bb)) + sac := math.Sqrt(math.Abs(cc)) + p = sab * sac + if cc < 0 { + p *= -1 + } + tau = 1 / math.Sqrt(math.Abs(bb+cc)) + aa = temp + p + bb = bb - cc + cc = 0 + dd = temp - p + cs1 := sab * tau + sn1 := sac * tau + cs, sn = cs*cs1-sn*sn1, cs*sn1+sn*cs1 + } + } else { + bb = -cc + cc = 0 + cs, sn = -sn, cs + } + } + } + } + + // Store eigenvalues in (rt1r,rt1i) and (rt2r,rt2i). + rt1r = aa + rt2r = dd + if cc != 0 { + rt1i = math.Sqrt(math.Abs(bb)) * math.Sqrt(math.Abs(cc)) + rt2i = -rt1i + } + return +} diff --git a/vendor/gonum.org/v1/gonum/lapack/gonum/dlapll.go b/vendor/gonum.org/v1/gonum/lapack/gonum/dlapll.go new file mode 100644 index 0000000000..bf98c338eb --- /dev/null +++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dlapll.go @@ -0,0 +1,55 @@ +// Copyright ©2017 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gonum + +import "gonum.org/v1/gonum/blas/blas64" + +// Dlapll returns the smallest singular value of the n×2 matrix A = [ x y ]. +// The function first computes the QR factorization of A = Q*R, and then computes +// the SVD of the 2-by-2 upper triangular matrix r. +// +// The contents of x and y are overwritten during the call. +// +// Dlapll is an internal routine. It is exported for testing purposes. +func (impl Implementation) Dlapll(n int, x []float64, incX int, y []float64, incY int) float64 { + switch { + case n < 0: + panic(nLT0) + case incX <= 0: + panic(badIncX) + case incY <= 0: + panic(badIncY) + } + + // Quick return if possible. + if n == 0 { + return 0 + } + + switch { + case len(x) < 1+(n-1)*incX: + panic(shortX) + case len(y) < 1+(n-1)*incY: + panic(shortY) + } + + // Quick return if possible. + if n == 1 { + return 0 + } + + // Compute the QR factorization of the N-by-2 matrix [ X Y ]. + a00, tau := impl.Dlarfg(n, x[0], x[incX:], incX) + x[0] = 1 + + bi := blas64.Implementation() + c := -tau * bi.Ddot(n, x, incX, y, incY) + bi.Daxpy(n, c, x, incX, y, incY) + a11, _ := impl.Dlarfg(n-1, y[incY], y[2*incY:], incY) + + // Compute the SVD of 2-by-2 upper triangular matrix. + ssmin, _ := impl.Dlas2(a00, y[0], a11) + return ssmin +} diff --git a/vendor/gonum.org/v1/gonum/lapack/gonum/dlapmr.go b/vendor/gonum.org/v1/gonum/lapack/gonum/dlapmr.go new file mode 100644 index 0000000000..73cd82db96 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dlapmr.go @@ -0,0 +1,88 @@ +// Copyright ©2022 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gonum + +import "gonum.org/v1/gonum/blas/blas64" + +// Dlapmr rearranges the rows of the m×n matrix X as specified by the permutation +// k[0],k[1],...,k[m-1] of the integers 0,...,m-1. +// +// If forward is true, a forward permutation is applied: +// +// X[k[i],0:n] is moved to X[i,0:n] for i=0,1,...,m-1. +// +// If forward is false, a backward permutation is applied: +// +// X[i,0:n] is moved to X[k[i],0:n] for i=0,1,...,m-1. +// +// k must have length m, otherwise Dlapmr will panic. +func (impl Implementation) Dlapmr(forward bool, m, n int, x []float64, ldx int, k []int) { + switch { + case m < 0: + panic(mLT0) + case n < 0: + panic(nLT0) + case ldx < max(1, n): + panic(badLdX) + } + + // Quick return if possible. + if m == 0 || n == 0 { + return + } + + switch { + case len(x) < (m-1)*ldx+n: + panic(shortX) + case len(k) != m: + panic(badLenK) + } + + // Quick return if possible. + if m == 1 { + return + } + + bi := blas64.Implementation() + + for i, ki := range k { + k[i] = -(ki + 1) + } + if forward { + for i, ki := range k { + if ki >= 0 { + continue + } + j := i + k[j] = -k[j] - 1 + in := k[j] + for { + if k[in] >= 0 { + break + } + bi.Dswap(n, x[j*ldx:], 1, x[in*ldx:], 1) + k[in] = -k[in] - 1 + j = in + in = k[in] + } + } + } else { + for i, ki := range k { + if ki >= 0 { + continue + } + k[i] = -ki - 1 + j := k[i] + for { + if j == i { + break + } + bi.Dswap(n, x[i*ldx:], 1, x[j*ldx:], 1) + k[j] = -k[j] - 1 + j = k[j] + } + } + } +} diff --git a/vendor/gonum.org/v1/gonum/lapack/gonum/dlapmt.go b/vendor/gonum.org/v1/gonum/lapack/gonum/dlapmt.go new file mode 100644 index 0000000000..4a70e68f04 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dlapmt.go @@ -0,0 +1,89 @@ +// Copyright ©2017 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gonum + +import "gonum.org/v1/gonum/blas/blas64" + +// Dlapmt rearranges the columns of the m×n matrix X as specified by the +// permutation k_0, k_1, ..., k_n-1 of the integers 0, ..., n-1. +// +// If forward is true a forward permutation is performed: +// +// X[0:m, k[j]] is moved to X[0:m, j] for j = 0, 1, ..., n-1. +// +// otherwise a backward permutation is performed: +// +// X[0:m, j] is moved to X[0:m, k[j]] for j = 0, 1, ..., n-1. +// +// k must have length n, otherwise Dlapmt will panic. k is zero-indexed. +func (impl Implementation) Dlapmt(forward bool, m, n int, x []float64, ldx int, k []int) { + switch { + case m < 0: + panic(mLT0) + case n < 0: + panic(nLT0) + case ldx < max(1, n): + panic(badLdX) + } + + // Quick return if possible. + if m == 0 || n == 0 { + return + } + + switch { + case len(x) < (m-1)*ldx+n: + panic(shortX) + case len(k) != n: + panic(badLenK) + } + + // Quick return if possible. + if n == 1 { + return + } + + for i, v := range k { + v++ + k[i] = -v + } + + bi := blas64.Implementation() + + if forward { + for j, v := range k { + if v >= 0 { + continue + } + k[j] = -v + i := -v - 1 + for k[i] < 0 { + bi.Dswap(m, x[j:], ldx, x[i:], ldx) + + k[i] = -k[i] + j = i + i = k[i] - 1 + } + } + } else { + for i, v := range k { + if v >= 0 { + continue + } + k[i] = -v + j := -v - 1 + for j != i { + bi.Dswap(m, x[j:], ldx, x[i:], ldx) + + k[j] = -k[j] + j = k[j] - 1 + } + } + } + + for i := range k { + k[i]-- + } +} diff --git a/vendor/gonum.org/v1/gonum/lapack/gonum/dlapy2.go b/vendor/gonum.org/v1/gonum/lapack/gonum/dlapy2.go new file mode 100644 index 0000000000..19f73ffabd --- /dev/null +++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dlapy2.go @@ -0,0 +1,14 @@ +// Copyright ©2015 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gonum + +import "math" + +// Dlapy2 is the LAPACK version of math.Hypot. +// +// Dlapy2 is an internal routine. It is exported for testing purposes. +func (Implementation) Dlapy2(x, y float64) float64 { + return math.Hypot(x, y) +} diff --git a/vendor/gonum.org/v1/gonum/lapack/gonum/dlaqp2.go b/vendor/gonum.org/v1/gonum/lapack/gonum/dlaqp2.go new file mode 100644 index 0000000000..cc3bc06db6 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dlaqp2.go @@ -0,0 +1,127 @@ +// Copyright ©2017 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gonum + +import ( + "math" + + "gonum.org/v1/gonum/blas" + "gonum.org/v1/gonum/blas/blas64" +) + +// Dlaqp2 computes a QR factorization with column pivoting of the block A[offset:m, 0:n] +// of the m×n matrix A. The block A[0:offset, 0:n] is accordingly pivoted, but not factorized. +// +// On exit, the upper triangle of block A[offset:m, 0:n] is the triangular factor obtained. +// The elements in block A[offset:m, 0:n] below the diagonal, together with tau, represent +// the orthogonal matrix Q as a product of elementary reflectors. +// +// offset is number of rows of the matrix A that must be pivoted but not factorized. +// offset must not be negative otherwise Dlaqp2 will panic. +// +// On exit, jpvt holds the permutation that was applied; the jth column of A*P was the +// jpvt[j] column of A. jpvt must have length n, otherwise Dlaqp2 will panic. +// +// On exit tau holds the scalar factors of the elementary reflectors. It must have length +// at least min(m-offset, n) otherwise Dlaqp2 will panic. +// +// vn1 and vn2 hold the partial and complete column norms respectively. They must have length n, +// otherwise Dlaqp2 will panic. +// +// work must have length n, otherwise Dlaqp2 will panic. +// +// Dlaqp2 is an internal routine. It is exported for testing purposes. +func (impl Implementation) Dlaqp2(m, n, offset int, a []float64, lda int, jpvt []int, tau, vn1, vn2, work []float64) { + switch { + case m < 0: + panic(mLT0) + case n < 0: + panic(nLT0) + case offset < 0: + panic(offsetLT0) + case offset > m: + panic(offsetGTM) + case lda < max(1, n): + panic(badLdA) + } + + // Quick return if possible. + if m == 0 || n == 0 { + return + } + + mn := min(m-offset, n) + switch { + case len(a) < (m-1)*lda+n: + panic(shortA) + case len(jpvt) != n: + panic(badLenJpvt) + case len(tau) < mn: + panic(shortTau) + case len(vn1) < n: + panic(shortVn1) + case len(vn2) < n: + panic(shortVn2) + case len(work) < n: + panic(shortWork) + } + + tol3z := math.Sqrt(dlamchE) + + bi := blas64.Implementation() + + // Compute factorization. + for i := 0; i < mn; i++ { + offpi := offset + i + + // Determine ith pivot column and swap if necessary. + p := i + bi.Idamax(n-i, vn1[i:], 1) + if p != i { + bi.Dswap(m, a[p:], lda, a[i:], lda) + jpvt[p], jpvt[i] = jpvt[i], jpvt[p] + vn1[p] = vn1[i] + vn2[p] = vn2[i] + } + + // Generate elementary reflector H_i. + if offpi < m-1 { + a[offpi*lda+i], tau[i] = impl.Dlarfg(m-offpi, a[offpi*lda+i], a[(offpi+1)*lda+i:], lda) + } else { + tau[i] = 0 + } + + if i < n-1 { + // Apply H_iᵀ to A[offset+i:m, i:n] from the left. + aii := a[offpi*lda+i] + a[offpi*lda+i] = 1 + impl.Dlarf(blas.Left, m-offpi, n-i-1, a[offpi*lda+i:], lda, tau[i], a[offpi*lda+i+1:], lda, work) + a[offpi*lda+i] = aii + } + + // Update partial column norms. + for j := i + 1; j < n; j++ { + if vn1[j] == 0 { + continue + } + + // The following marked lines follow from the + // analysis in Lapack Working Note 176. + r := math.Abs(a[offpi*lda+j]) / vn1[j] // * + temp := math.Max(0, 1-r*r) // * + r = vn1[j] / vn2[j] // * + temp2 := temp * r * r // * + if temp2 < tol3z { + var v float64 + if offpi < m-1 { + v = bi.Dnrm2(m-offpi-1, a[(offpi+1)*lda+j:], lda) + } + vn1[j] = v + vn2[j] = v + } else { + vn1[j] *= math.Sqrt(temp) // * + } + } + } +} diff --git a/vendor/gonum.org/v1/gonum/lapack/gonum/dlaqps.go b/vendor/gonum.org/v1/gonum/lapack/gonum/dlaqps.go new file mode 100644 index 0000000000..da1a60e5cf --- /dev/null +++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dlaqps.go @@ -0,0 +1,244 @@ +// Copyright ©2017 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gonum + +import ( + "math" + + "gonum.org/v1/gonum/blas" + "gonum.org/v1/gonum/blas/blas64" +) + +// Dlaqps computes a step of QR factorization with column pivoting +// of an m×n matrix A by using Blas-3. It tries to factorize nb +// columns from A starting from the row offset, and updates all +// of the matrix with Dgemm. +// +// In some cases, due to catastrophic cancellations, it cannot +// factorize nb columns. Hence, the actual number of factorized +// columns is returned in kb. +// +// Dlaqps computes a QR factorization with column pivoting of the +// block A[offset:m, 0:nb] of the m×n matrix A. The block +// A[0:offset, 0:n] is accordingly pivoted, but not factorized. +// +// On exit, the upper triangle of block A[offset:m, 0:kb] is the +// triangular factor obtained. The elements in block A[offset:m, 0:n] +// below the diagonal, together with tau, represent the orthogonal +// matrix Q as a product of elementary reflectors. +// +// offset is number of rows of the matrix A that must be pivoted but +// not factorized. offset must not be negative otherwise Dlaqps will panic. +// +// On exit, jpvt holds the permutation that was applied; the jth column +// of A*P was the jpvt[j] column of A. jpvt must have length n, +// otherwise Dlapqs will panic. +// +// On exit tau holds the scalar factors of the elementary reflectors. +// It must have length nb, otherwise Dlapqs will panic. +// +// vn1 and vn2 hold the partial and complete column norms respectively. +// They must have length n, otherwise Dlapqs will panic. +// +// auxv must have length nb, otherwise Dlaqps will panic. +// +// f and ldf represent an n×nb matrix F that is overwritten during the +// call. +// +// Dlaqps is an internal routine. It is exported for testing purposes. +func (impl Implementation) Dlaqps(m, n, offset, nb int, a []float64, lda int, jpvt []int, tau, vn1, vn2, auxv, f []float64, ldf int) (kb int) { + switch { + case m < 0: + panic(mLT0) + case n < 0: + panic(nLT0) + case offset < 0: + panic(offsetLT0) + case offset > m: + panic(offsetGTM) + case nb < 0: + panic(nbLT0) + case nb > n: + panic(nbGTN) + case lda < max(1, n): + panic(badLdA) + case ldf < max(1, nb): + panic(badLdF) + } + + if m == 0 || n == 0 { + return 0 + } + + switch { + case len(a) < (m-1)*lda+n: + panic(shortA) + case len(jpvt) != n: + panic(badLenJpvt) + case len(vn1) < n: + panic(shortVn1) + case len(vn2) < n: + panic(shortVn2) + } + + if nb == 0 { + return 0 + } + + switch { + case len(tau) < nb: + panic(shortTau) + case len(auxv) < nb: + panic(shortAuxv) + case len(f) < (n-1)*ldf+nb: + panic(shortF) + } + + if offset == m { + return 0 + } + + lastrk := min(m, n+offset) + lsticc := -1 + tol3z := math.Sqrt(dlamchE) + + bi := blas64.Implementation() + + var k, rk int + for ; k < nb && lsticc == -1; k++ { + rk = offset + k + + // Determine kth pivot column and swap if necessary. + p := k + bi.Idamax(n-k, vn1[k:], 1) + if p != k { + bi.Dswap(m, a[p:], lda, a[k:], lda) + bi.Dswap(k, f[p*ldf:], 1, f[k*ldf:], 1) + jpvt[p], jpvt[k] = jpvt[k], jpvt[p] + vn1[p] = vn1[k] + vn2[p] = vn2[k] + } + + // Apply previous Householder reflectors to column K: + // + // A[rk:m, k] = A[rk:m, k] - A[rk:m, 0:k-1]*F[k, 0:k-1]ᵀ. + if k > 0 { + bi.Dgemv(blas.NoTrans, m-rk, k, -1, + a[rk*lda:], lda, + f[k*ldf:], 1, + 1, + a[rk*lda+k:], lda) + } + + // Generate elementary reflector H_k. + if rk < m-1 { + a[rk*lda+k], tau[k] = impl.Dlarfg(m-rk, a[rk*lda+k], a[(rk+1)*lda+k:], lda) + } else { + tau[k] = 0 + } + + akk := a[rk*lda+k] + a[rk*lda+k] = 1 + + // Compute kth column of F: + // + // Compute F[k+1:n, k] = tau[k]*A[rk:m, k+1:n]ᵀ*A[rk:m, k]. + if k < n-1 { + bi.Dgemv(blas.Trans, m-rk, n-k-1, tau[k], + a[rk*lda+k+1:], lda, + a[rk*lda+k:], lda, + 0, + f[(k+1)*ldf+k:], ldf) + } + + // Padding F[0:k, k] with zeros. + for j := 0; j < k; j++ { + f[j*ldf+k] = 0 + } + + // Incremental updating of F: + // + // F[0:n, k] := F[0:n, k] - tau[k]*F[0:n, 0:k-1]*A[rk:m, 0:k-1]ᵀ*A[rk:m,k]. + if k > 0 { + bi.Dgemv(blas.Trans, m-rk, k, -tau[k], + a[rk*lda:], lda, + a[rk*lda+k:], lda, + 0, + auxv, 1) + bi.Dgemv(blas.NoTrans, n, k, 1, + f, ldf, + auxv, 1, + 1, + f[k:], ldf) + } + + // Update the current row of A: + // + // A[rk, k+1:n] = A[rk, k+1:n] - A[rk, 0:k]*F[k+1:n, 0:k]ᵀ. + if k < n-1 { + bi.Dgemv(blas.NoTrans, n-k-1, k+1, -1, + f[(k+1)*ldf:], ldf, + a[rk*lda:], 1, + 1, + a[rk*lda+k+1:], 1) + } + + // Update partial column norms. + if rk < lastrk-1 { + for j := k + 1; j < n; j++ { + if vn1[j] == 0 { + continue + } + + // The following marked lines follow from the + // analysis in Lapack Working Note 176. + r := math.Abs(a[rk*lda+j]) / vn1[j] // * + temp := math.Max(0, 1-r*r) // * + r = vn1[j] / vn2[j] // * + temp2 := temp * r * r // * + if temp2 < tol3z { + // vn2 is used here as a collection of + // indices into vn2 and also a collection + // of column norms. + vn2[j] = float64(lsticc) + lsticc = j + } else { + vn1[j] *= math.Sqrt(temp) // * + } + } + } + + a[rk*lda+k] = akk + } + kb = k + rk = offset + kb + + // Apply the block reflector to the rest of the matrix: + // + // A[offset+kb+1:m, kb+1:n] := A[offset+kb+1:m, kb+1:n] - A[offset+kb+1:m, 1:kb]*F[kb+1:n, 1:kb]ᵀ. + if kb < min(n, m-offset) { + bi.Dgemm(blas.NoTrans, blas.Trans, + m-rk, n-kb, kb, -1, + a[rk*lda:], lda, + f[kb*ldf:], ldf, + 1, + a[rk*lda+kb:], lda) + } + + // Recomputation of difficult columns. + for lsticc >= 0 { + itemp := int(vn2[lsticc]) + + // NOTE: The computation of vn1[lsticc] relies on the fact that + // Dnrm2 does not fail on vectors with norm below the value of + // sqrt(dlamchS) + v := bi.Dnrm2(m-rk, a[rk*lda+lsticc:], lda) + vn1[lsticc] = v + vn2[lsticc] = v + + lsticc = itemp + } + + return kb +} diff --git a/vendor/gonum.org/v1/gonum/lapack/gonum/dlaqr04.go b/vendor/gonum.org/v1/gonum/lapack/gonum/dlaqr04.go new file mode 100644 index 0000000000..8e4b266b85 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dlaqr04.go @@ -0,0 +1,493 @@ +// Copyright ©2016 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gonum + +import ( + "math" + + "gonum.org/v1/gonum/blas" +) + +// Dlaqr04 computes the eigenvalues of a block of an n×n upper Hessenberg matrix +// H, and optionally the matrices T and Z from the Schur decomposition +// +// H = Z T Zᵀ +// +// where T is an upper quasi-triangular matrix (the Schur form), and Z is the +// orthogonal matrix of Schur vectors. +// +// wantt indicates whether the full Schur form T is required. If wantt is false, +// then only enough of H will be updated to preserve the eigenvalues. +// +// wantz indicates whether the n×n matrix of Schur vectors Z is required. If it +// is true, the orthogonal similarity transformation will be accumulated into +// Z[iloz:ihiz+1,ilo:ihi+1], otherwise Z will not be referenced. +// +// ilo and ihi determine the block of H on which Dlaqr04 operates. It must hold that +// +// 0 <= ilo <= ihi < n if n > 0, +// ilo == 0 and ihi == -1 if n == 0, +// +// and the block must be isolated, that is, +// +// ilo == 0 or H[ilo,ilo-1] == 0, +// ihi == n-1 or H[ihi+1,ihi] == 0, +// +// otherwise Dlaqr04 will panic. +// +// wr and wi must have length ihi+1. +// +// iloz and ihiz specify the rows of Z to which transformations will be applied +// if wantz is true. It must hold that +// +// 0 <= iloz <= ilo, and ihi <= ihiz < n, +// +// otherwise Dlaqr04 will panic. +// +// work must have length at least lwork and lwork must be +// +// lwork >= 1 if n <= 11, +// lwork >= n if n > 11, +// +// otherwise Dlaqr04 will panic. lwork as large as 6*n may be required for +// optimal performance. On return, work[0] will contain the optimal value of +// lwork. +// +// If lwork is -1, instead of performing Dlaqr04, the function only estimates the +// optimal workspace size and stores it into work[0]. Neither h nor z are +// accessed. +// +// recur is the non-negative recursion depth. For recur > 0, Dlaqr04 behaves +// as DLAQR0, for recur == 0 it behaves as DLAQR4. +// +// unconverged indicates whether Dlaqr04 computed all the eigenvalues of H[ilo:ihi+1,ilo:ihi+1]. +// +// If unconverged is zero and wantt is true, H will contain on return the upper +// quasi-triangular matrix T from the Schur decomposition. 2×2 diagonal blocks +// (corresponding to complex conjugate pairs of eigenvalues) will be returned in +// standard form, with H[i,i] == H[i+1,i+1] and H[i+1,i]*H[i,i+1] < 0. +// +// If unconverged is zero and if wantt is false, the contents of h on return is +// unspecified. +// +// If unconverged is zero, all the eigenvalues have been computed and their real +// and imaginary parts will be stored on return in wr[ilo:ihi+1] and +// wi[ilo:ihi+1], respectively. If two eigenvalues are computed as a complex +// conjugate pair, they are stored in consecutive elements of wr and wi, say the +// i-th and (i+1)th, with wi[i] > 0 and wi[i+1] < 0. If wantt is true, then the +// eigenvalues are stored in the same order as on the diagonal of the Schur form +// returned in H, with wr[i] = H[i,i] and, if H[i:i+2,i:i+2] is a 2×2 diagonal +// block, wi[i] = sqrt(-H[i+1,i]*H[i,i+1]) and wi[i+1] = -wi[i]. +// +// If unconverged is positive, some eigenvalues have not converged, and +// wr[unconverged:ihi+1] and wi[unconverged:ihi+1] will contain those +// eigenvalues which have been successfully computed. Failures are rare. +// +// If unconverged is positive and wantt is true, then on return +// +// (initial H)*U = U*(final H), (*) +// +// where U is an orthogonal matrix. The final H is upper Hessenberg and +// H[unconverged:ihi+1,unconverged:ihi+1] is upper quasi-triangular. +// +// If unconverged is positive and wantt is false, on return the remaining +// unconverged eigenvalues are the eigenvalues of the upper Hessenberg matrix +// H[ilo:unconverged,ilo:unconverged]. +// +// If unconverged is positive and wantz is true, then on return +// +// (final Z) = (initial Z)*U, +// +// where U is the orthogonal matrix in (*) regardless of the value of wantt. +// +// References: +// +// [1] K. Braman, R. Byers, R. Mathias. The Multishift QR Algorithm. Part I: +// Maintaining Well-Focused Shifts and Level 3 Performance. SIAM J. Matrix +// Anal. Appl. 23(4) (2002), pp. 929—947 +// URL: http://dx.doi.org/10.1137/S0895479801384573 +// [2] K. Braman, R. Byers, R. Mathias. The Multishift QR Algorithm. Part II: +// Aggressive Early Deflation. SIAM J. Matrix Anal. Appl. 23(4) (2002), pp. 948—973 +// URL: http://dx.doi.org/10.1137/S0895479801384585 +// +// Dlaqr04 is an internal routine. It is exported for testing purposes. +func (impl Implementation) Dlaqr04(wantt, wantz bool, n, ilo, ihi int, h []float64, ldh int, wr, wi []float64, iloz, ihiz int, z []float64, ldz int, work []float64, lwork int, recur int) (unconverged int) { + const ( + // Matrices of order ntiny or smaller must be processed by + // Dlahqr because of insufficient subdiagonal scratch space. + // This is a hard limit. + ntiny = 15 + // Exceptional deflation windows: try to cure rare slow + // convergence by varying the size of the deflation window after + // kexnw iterations. + kexnw = 5 + // Exceptional shifts: try to cure rare slow convergence with + // ad-hoc exceptional shifts every kexsh iterations. + kexsh = 6 + + // See https://github.com/gonum/lapack/pull/151#discussion_r68162802 + // and the surrounding discussion for an explanation where these + // constants come from. + // TODO(vladimir-ch): Similar constants for exceptional shifts + // are used also in dlahqr.go. The first constant is different + // there, it is equal to 3. Why? And does it matter? + wilk1 = 0.75 + wilk2 = -0.4375 + ) + + switch { + case n < 0: + panic(nLT0) + case ilo < 0 || max(0, n-1) < ilo: + panic(badIlo) + case ihi < min(ilo, n-1) || n <= ihi: + panic(badIhi) + case ldh < max(1, n): + panic(badLdH) + case wantz && (iloz < 0 || ilo < iloz): + panic(badIloz) + case wantz && (ihiz < ihi || n <= ihiz): + panic(badIhiz) + case ldz < 1, wantz && ldz < n: + panic(badLdZ) + case lwork < 1 && lwork != -1: + panic(badLWork) + // TODO(vladimir-ch): Enable if and when we figure out what the minimum + // necessary lwork value is. Dlaqr04 says that the minimum is n which + // clashes with Dlaqr23's opinion about optimal work when nw <= 2 + // (independent of n). + // case lwork < n && n > ntiny && lwork != -1: + // panic(badLWork) + case len(work) < max(1, lwork): + panic(shortWork) + case recur < 0: + panic(recurLT0) + } + + // Quick return. + if n == 0 { + work[0] = 1 + return 0 + } + + if lwork != -1 { + switch { + case len(h) < (n-1)*ldh+n: + panic(shortH) + case len(wr) != ihi+1: + panic(badLenWr) + case len(wi) != ihi+1: + panic(badLenWi) + case wantz && len(z) < (n-1)*ldz+n: + panic(shortZ) + case ilo > 0 && h[ilo*ldh+ilo-1] != 0: + panic(notIsolated) + case ihi+1 < n && h[(ihi+1)*ldh+ihi] != 0: + panic(notIsolated) + } + } + + if n <= ntiny { + // Tiny matrices must use Dlahqr. + if lwork == -1 { + work[0] = 1 + return 0 + } + return impl.Dlahqr(wantt, wantz, n, ilo, ihi, h, ldh, wr, wi, iloz, ihiz, z, ldz) + } + + // Use small bulge multi-shift QR with aggressive early deflation on + // larger-than-tiny matrices. + var jbcmpz string + if wantt { + jbcmpz = "S" + } else { + jbcmpz = "E" + } + if wantz { + jbcmpz += "V" + } else { + jbcmpz += "N" + } + + var fname string + if recur > 0 { + fname = "DLAQR0" + } else { + fname = "DLAQR4" + } + // nwr is the recommended deflation window size. n is greater than ntiny, + // so there is enough subdiagonal workspace for nwr >= 2 as required. + // (In fact, there is enough subdiagonal space for nwr >= 4.) + // TODO(vladimir-ch): If there is enough space for nwr >= 4, should we + // use it? + nwr := impl.Ilaenv(13, fname, jbcmpz, n, ilo, ihi, lwork) + nwr = max(2, nwr) + nwr = min(ihi-ilo+1, min((n-1)/3, nwr)) + + // nsr is the recommended number of simultaneous shifts. n is greater than + // ntiny, so there is enough subdiagonal workspace for nsr to be even and + // greater than or equal to two as required. + nsr := impl.Ilaenv(15, fname, jbcmpz, n, ilo, ihi, lwork) + nsr = min(nsr, min((n-3)/6, ihi-ilo)) + nsr = max(2, nsr&^1) + + // Workspace query call to Dlaqr23. + impl.Dlaqr23(wantt, wantz, n, ilo, ihi, nwr+1, h, ldh, iloz, ihiz, z, ldz, + wr, wi, h, ldh, n, h, ldh, n, h, ldh, work, -1, recur) + // Optimal workspace is max(Dlaqr5, Dlaqr23). + lwkopt := max(3*nsr/2, int(work[0])) + // Quick return in case of workspace query. + if lwork == -1 { + work[0] = float64(lwkopt) + return 0 + } + + // Dlahqr/Dlaqr04 crossover point. + nmin := impl.Ilaenv(12, fname, jbcmpz, n, ilo, ihi, lwork) + nmin = max(ntiny, nmin) + + // Nibble determines when to skip a multi-shift QR sweep (Dlaqr5). + nibble := impl.Ilaenv(14, fname, jbcmpz, n, ilo, ihi, lwork) + nibble = max(0, nibble) + + // Computation mode of far-from-diagonal orthogonal updates in Dlaqr5. + kacc22 := impl.Ilaenv(16, fname, jbcmpz, n, ilo, ihi, lwork) + kacc22 = max(0, min(kacc22, 2)) + + // nwmax is the largest possible deflation window for which there is + // sufficient workspace. + nwmax := min((n-1)/3, lwork/2) + nw := nwmax // Start with maximum deflation window size. + + // nsmax is the largest number of simultaneous shifts for which there is + // sufficient workspace. + nsmax := min((n-3)/6, 2*lwork/3) &^ 1 + + ndfl := 1 // Number of iterations since last deflation. + ndec := 0 // Deflation window size decrement. + + // Main loop. + var ( + itmax = max(30, 2*kexsh) * max(10, (ihi-ilo+1)) + it = 0 + ) + for kbot := ihi; kbot >= ilo; { + if it == itmax { + unconverged = kbot + 1 + break + } + it++ + + // Locate active block. + ktop := ilo + for k := kbot; k >= ilo+1; k-- { + if h[k*ldh+k-1] == 0 { + ktop = k + break + } + } + + // Select deflation window size nw. + // + // Typical Case: + // If possible and advisable, nibble the entire active block. + // If not, use size min(nwr,nwmax) or min(nwr+1,nwmax) + // depending upon which has the smaller corresponding + // subdiagonal entry (a heuristic). + // + // Exceptional Case: + // If there have been no deflations in kexnw or more + // iterations, then vary the deflation window size. At first, + // because larger windows are, in general, more powerful than + // smaller ones, rapidly increase the window to the maximum + // possible. Then, gradually reduce the window size. + nh := kbot - ktop + 1 + nwupbd := min(nh, nwmax) + if ndfl < kexnw { + nw = min(nwupbd, nwr) + } else { + nw = min(nwupbd, 2*nw) + } + if nw < nwmax { + if nw >= nh-1 { + nw = nh + } else { + kwtop := kbot - nw + 1 + if math.Abs(h[kwtop*ldh+kwtop-1]) > math.Abs(h[(kwtop-1)*ldh+kwtop-2]) { + nw++ + } + } + } + if ndfl < kexnw { + ndec = -1 + } else if ndec >= 0 || nw >= nwupbd { + ndec++ + if nw-ndec < 2 { + ndec = 0 + } + nw -= ndec + } + + // Split workspace under the subdiagonal of H into: + // - an nw×nw work array V in the lower left-hand corner, + // - an nw×nhv horizontal work array along the bottom edge (nhv + // must be at least nw but more is better), + // - an nve×nw vertical work array along the left-hand-edge + // (nhv can be any positive integer but more is better). + kv := n - nw + kt := nw + kwv := nw + 1 + nhv := n - kwv - kt + // Aggressive early deflation. + ls, ld := impl.Dlaqr23(wantt, wantz, n, ktop, kbot, nw, + h, ldh, iloz, ihiz, z, ldz, wr[:kbot+1], wi[:kbot+1], + h[kv*ldh:], ldh, nhv, h[kv*ldh+kt:], ldh, nhv, h[kwv*ldh:], ldh, work, lwork, recur) + + // Adjust kbot accounting for new deflations. + kbot -= ld + // ks points to the shifts. + ks := kbot - ls + 1 + + // Skip an expensive QR sweep if there is a (partly heuristic) + // reason to expect that many eigenvalues will deflate without + // it. Here, the QR sweep is skipped if many eigenvalues have + // just been deflated or if the remaining active block is small. + if ld > 0 && (100*ld > nw*nibble || kbot-ktop+1 <= min(nmin, nwmax)) { + // ld is positive, note progress. + ndfl = 1 + continue + } + + // ns is the nominal number of simultaneous shifts. This may be + // lowered (slightly) if Dlaqr23 did not provide that many + // shifts. + ns := min(min(nsmax, nsr), max(2, kbot-ktop)) &^ 1 + + // If there have been no deflations in a multiple of kexsh + // iterations, then try exceptional shifts. Otherwise use shifts + // provided by Dlaqr23 above or from the eigenvalues of a + // trailing principal submatrix. + if ndfl%kexsh == 0 { + ks = kbot - ns + 1 + for i := kbot; i > max(ks, ktop+1); i -= 2 { + ss := math.Abs(h[i*ldh+i-1]) + math.Abs(h[(i-1)*ldh+i-2]) + aa := wilk1*ss + h[i*ldh+i] + _, _, _, _, wr[i-1], wi[i-1], wr[i], wi[i], _, _ = + impl.Dlanv2(aa, ss, wilk2*ss, aa) + } + if ks == ktop { + wr[ks+1] = h[(ks+1)*ldh+ks+1] + wi[ks+1] = 0 + wr[ks] = wr[ks+1] + wi[ks] = wi[ks+1] + } + } else { + // If we got ns/2 or fewer shifts, use Dlahqr or recur + // into Dlaqr04 on a trailing principal submatrix to get + // more. Since ns <= nsmax <=(n+6)/9, there is enough + // space below the subdiagonal to fit an ns×ns scratch + // array. + if kbot-ks+1 <= ns/2 { + ks = kbot - ns + 1 + kt = n - ns + impl.Dlacpy(blas.All, ns, ns, h[ks*ldh+ks:], ldh, h[kt*ldh:], ldh) + if ns > nmin && recur > 0 { + ks += impl.Dlaqr04(false, false, ns, 1, ns-1, h[kt*ldh:], ldh, + wr[ks:ks+ns], wi[ks:ks+ns], 0, 0, nil, 0, work, lwork, recur-1) + } else { + ks += impl.Dlahqr(false, false, ns, 0, ns-1, h[kt*ldh:], ldh, + wr[ks:ks+ns], wi[ks:ks+ns], 0, 0, nil, 1) + } + // In case of a rare QR failure use eigenvalues + // of the trailing 2×2 principal submatrix. + if ks >= kbot { + aa := h[(kbot-1)*ldh+kbot-1] + bb := h[(kbot-1)*ldh+kbot] + cc := h[kbot*ldh+kbot-1] + dd := h[kbot*ldh+kbot] + _, _, _, _, wr[kbot-1], wi[kbot-1], wr[kbot], wi[kbot], _, _ = + impl.Dlanv2(aa, bb, cc, dd) + ks = kbot - 1 + } + } + + if kbot-ks+1 > ns { + // Sorting the shifts helps a little. Bubble + // sort keeps complex conjugate pairs together. + sorted := false + for k := kbot; k > ks; k-- { + if sorted { + break + } + sorted = true + for i := ks; i < k; i++ { + if math.Abs(wr[i])+math.Abs(wi[i]) >= math.Abs(wr[i+1])+math.Abs(wi[i+1]) { + continue + } + sorted = false + wr[i], wr[i+1] = wr[i+1], wr[i] + wi[i], wi[i+1] = wi[i+1], wi[i] + } + } + } + + // Shuffle shifts into pairs of real shifts and pairs of + // complex conjugate shifts using the fact that complex + // conjugate shifts are already adjacent to one another. + // TODO(vladimir-ch): The shuffling here could probably + // be removed but I'm not sure right now and it's safer + // to leave it. + for i := kbot; i > ks+1; i -= 2 { + if wi[i] == -wi[i-1] { + continue + } + wr[i], wr[i-1], wr[i-2] = wr[i-1], wr[i-2], wr[i] + wi[i], wi[i-1], wi[i-2] = wi[i-1], wi[i-2], wi[i] + } + } + + // If there are only two shifts and both are real, then use only one. + if kbot-ks+1 == 2 && wi[kbot] == 0 { + if math.Abs(wr[kbot]-h[kbot*ldh+kbot]) < math.Abs(wr[kbot-1]-h[kbot*ldh+kbot]) { + wr[kbot-1] = wr[kbot] + } else { + wr[kbot] = wr[kbot-1] + } + } + + // Use up to ns of the smallest magnitude shifts. If there + // aren't ns shifts available, then use them all, possibly + // dropping one to make the number of shifts even. + ns = min(ns, kbot-ks+1) &^ 1 + ks = kbot - ns + 1 + + // Split workspace under the subdiagonal into: + // - a kdu×kdu work array U in the lower left-hand-corner, + // - a kdu×nhv horizontal work array WH along the bottom edge + // (nhv must be at least kdu but more is better), + // - an nhv×kdu vertical work array WV along the left-hand-edge + // (nhv must be at least kdu but more is better). + kdu := 2 * ns + ku := n - kdu + kwh := kdu + kwv = kdu + 3 + nhv = n - kwv - kdu + // Small-bulge multi-shift QR sweep. + impl.Dlaqr5(wantt, wantz, kacc22, n, ktop, kbot, ns, + wr[ks:ks+ns], wi[ks:ks+ns], h, ldh, iloz, ihiz, z, ldz, + work, 3, h[ku*ldh:], ldh, nhv, h[kwv*ldh:], ldh, nhv, h[ku*ldh+kwh:], ldh) + + // Note progress (or the lack of it). + if ld > 0 { + ndfl = 1 + } else { + ndfl++ + } + } + + work[0] = float64(lwkopt) + return unconverged +} diff --git a/vendor/gonum.org/v1/gonum/lapack/gonum/dlaqr1.go b/vendor/gonum.org/v1/gonum/lapack/gonum/dlaqr1.go new file mode 100644 index 0000000000..c20c88fdb4 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dlaqr1.go @@ -0,0 +1,61 @@ +// Copyright ©2016 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gonum + +import "math" + +// Dlaqr1 sets v to a scalar multiple of the first column of the product +// +// (H - (sr1 + i*si1)*I)*(H - (sr2 + i*si2)*I) +// +// where H is a 2×2 or 3×3 matrix, I is the identity matrix of the same size, +// and i is the imaginary unit. Scaling is done to avoid overflows and most +// underflows. +// +// n is the order of H and must be either 2 or 3. It must hold that either sr1 = +// sr2 and si1 = -si2, or si1 = si2 = 0. The length of v must be equal to n. If +// any of these conditions is not met, Dlaqr1 will panic. +// +// Dlaqr1 is an internal routine. It is exported for testing purposes. +func (impl Implementation) Dlaqr1(n int, h []float64, ldh int, sr1, si1, sr2, si2 float64, v []float64) { + switch { + case n != 2 && n != 3: + panic("lapack: n must be 2 or 3") + case ldh < n: + panic(badLdH) + case len(h) < (n-1)*ldh+n: + panic(shortH) + case !((sr1 == sr2 && si1 == -si2) || (si1 == 0 && si2 == 0)): + panic(badShifts) + case len(v) != n: + panic(shortV) + } + + if n == 2 { + s := math.Abs(h[0]-sr2) + math.Abs(si2) + math.Abs(h[ldh]) + if s == 0 { + v[0] = 0 + v[1] = 0 + } else { + h21s := h[ldh] / s + v[0] = h21s*h[1] + (h[0]-sr1)*((h[0]-sr2)/s) - si1*(si2/s) + v[1] = h21s * (h[0] + h[ldh+1] - sr1 - sr2) + } + return + } + + s := math.Abs(h[0]-sr2) + math.Abs(si2) + math.Abs(h[ldh]) + math.Abs(h[2*ldh]) + if s == 0 { + v[0] = 0 + v[1] = 0 + v[2] = 0 + } else { + h21s := h[ldh] / s + h31s := h[2*ldh] / s + v[0] = (h[0]-sr1)*((h[0]-sr2)/s) - si1*(si2/s) + h[1]*h21s + h[2]*h31s + v[1] = h21s*(h[0]+h[ldh+1]-sr1-sr2) + h[ldh+2]*h31s + v[2] = h31s*(h[0]+h[2*ldh+2]-sr1-sr2) + h21s*h[2*ldh+1] + } +} diff --git a/vendor/gonum.org/v1/gonum/lapack/gonum/dlaqr23.go b/vendor/gonum.org/v1/gonum/lapack/gonum/dlaqr23.go new file mode 100644 index 0000000000..a3fa6661c6 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dlaqr23.go @@ -0,0 +1,423 @@ +// Copyright ©2016 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gonum + +import ( + "math" + + "gonum.org/v1/gonum/blas" + "gonum.org/v1/gonum/blas/blas64" + "gonum.org/v1/gonum/lapack" +) + +// Dlaqr23 performs the orthogonal similarity transformation of an n×n upper +// Hessenberg matrix to detect and deflate fully converged eigenvalues from a +// trailing principal submatrix using aggressive early deflation [1]. +// +// On return, H will be overwritten by a new Hessenberg matrix that is a +// perturbation of an orthogonal similarity transformation of H. It is hoped +// that on output H will have many zero subdiagonal entries. +// +// If wantt is true, the matrix H will be fully updated so that the +// quasi-triangular Schur factor can be computed. If wantt is false, then only +// enough of H will be updated to preserve the eigenvalues. +// +// If wantz is true, the orthogonal similarity transformation will be +// accumulated into Z[iloz:ihiz+1,ktop:kbot+1], otherwise Z is not referenced. +// +// ktop and kbot determine a block [ktop:kbot+1,ktop:kbot+1] along the diagonal +// of H. It must hold that +// +// 0 <= ilo <= ihi < n if n > 0, +// ilo == 0 and ihi == -1 if n == 0, +// +// and the block must be isolated, that is, it must hold that +// +// ktop == 0 or H[ktop,ktop-1] == 0, +// kbot == n-1 or H[kbot+1,kbot] == 0, +// +// otherwise Dlaqr23 will panic. +// +// nw is the deflation window size. It must hold that +// +// 0 <= nw <= kbot-ktop+1, +// +// otherwise Dlaqr23 will panic. +// +// iloz and ihiz specify the rows of the n×n matrix Z to which transformations +// will be applied if wantz is true. It must hold that +// +// 0 <= iloz <= ktop, and kbot <= ihiz < n, +// +// otherwise Dlaqr23 will panic. +// +// sr and si must have length kbot+1, otherwise Dlaqr23 will panic. +// +// v and ldv represent an nw×nw work matrix. +// t and ldt represent an nw×nh work matrix, and nh must be at least nw. +// wv and ldwv represent an nv×nw work matrix. +// +// work must have length at least lwork and lwork must be at least max(1,2*nw), +// otherwise Dlaqr23 will panic. Larger values of lwork may result in greater +// efficiency. On return, work[0] will contain the optimal value of lwork. +// +// If lwork is -1, instead of performing Dlaqr23, the function only estimates the +// optimal workspace size and stores it into work[0]. Neither h nor z are +// accessed. +// +// recur is the non-negative recursion depth. For recur > 0, Dlaqr23 behaves +// as DLAQR3, for recur == 0 it behaves as DLAQR2. +// +// On return, ns and nd will contain respectively the number of unconverged +// (i.e., approximate) eigenvalues and converged eigenvalues that are stored in +// sr and si. +// +// On return, the real and imaginary parts of approximate eigenvalues that may +// be used for shifts will be stored respectively in sr[kbot-nd-ns+1:kbot-nd+1] +// and si[kbot-nd-ns+1:kbot-nd+1]. +// +// On return, the real and imaginary parts of converged eigenvalues will be +// stored respectively in sr[kbot-nd+1:kbot+1] and si[kbot-nd+1:kbot+1]. +// +// References: +// +// [1] K. Braman, R. Byers, R. Mathias. The Multishift QR Algorithm. Part II: +// Aggressive Early Deflation. SIAM J. Matrix Anal. Appl 23(4) (2002), pp. 948—973 +// URL: http://dx.doi.org/10.1137/S0895479801384585 +func (impl Implementation) Dlaqr23(wantt, wantz bool, n, ktop, kbot, nw int, h []float64, ldh int, iloz, ihiz int, z []float64, ldz int, sr, si []float64, v []float64, ldv int, nh int, t []float64, ldt int, nv int, wv []float64, ldwv int, work []float64, lwork int, recur int) (ns, nd int) { + switch { + case n < 0: + panic(nLT0) + case ktop < 0 || max(0, n-1) < ktop: + panic(badKtop) + case kbot < min(ktop, n-1) || n <= kbot: + panic(badKbot) + case nw < 0 || kbot-ktop+1+1 < nw: + panic(badNw) + case ldh < max(1, n): + panic(badLdH) + case wantz && (iloz < 0 || ktop < iloz): + panic(badIloz) + case wantz && (ihiz < kbot || n <= ihiz): + panic(badIhiz) + case ldz < 1, wantz && ldz < n: + panic(badLdZ) + case ldv < max(1, nw): + panic(badLdV) + case nh < nw: + panic(badNh) + case ldt < max(1, nh): + panic(badLdT) + case nv < 0: + panic(nvLT0) + case ldwv < max(1, nw): + panic(badLdWV) + case lwork < max(1, 2*nw) && lwork != -1: + panic(badLWork) + case len(work) < max(1, lwork): + panic(shortWork) + case recur < 0: + panic(recurLT0) + } + + // Quick return for zero window size. + if nw == 0 { + work[0] = 1 + return 0, 0 + } + + // LAPACK code does not enforce the documented behavior + // nw <= kbot-ktop+1 + // but we do (we panic above). + jw := nw + lwkopt := max(1, 2*nw) + if jw > 2 { + // Workspace query call to Dgehrd. + impl.Dgehrd(jw, 0, jw-2, t, ldt, work, work, -1) + lwk1 := int(work[0]) + // Workspace query call to Dormhr. + impl.Dormhr(blas.Right, blas.NoTrans, jw, jw, 0, jw-2, t, ldt, work, v, ldv, work, -1) + lwk2 := int(work[0]) + if recur > 0 { + // Workspace query call to Dlaqr04. + impl.Dlaqr04(true, true, jw, 0, jw-1, t, ldt, sr, si, 0, jw-1, v, ldv, work, -1, recur-1) + lwk3 := int(work[0]) + // Optimal workspace. + lwkopt = max(jw+max(lwk1, lwk2), lwk3) + } else { + // Optimal workspace. + lwkopt = jw + max(lwk1, lwk2) + } + } + // Quick return in case of workspace query. + if lwork == -1 { + work[0] = float64(lwkopt) + return 0, 0 + } + + // Check input slices only if not doing workspace query. + switch { + case len(h) < (n-1)*ldh+n: + panic(shortH) + case len(v) < (nw-1)*ldv+nw: + panic(shortV) + case len(t) < (nw-1)*ldt+nh: + panic(shortT) + case len(wv) < (nv-1)*ldwv+nw: + panic(shortWV) + case wantz && len(z) < (n-1)*ldz+n: + panic(shortZ) + case len(sr) != kbot+1: + panic(badLenSr) + case len(si) != kbot+1: + panic(badLenSi) + case ktop > 0 && h[ktop*ldh+ktop-1] != 0: + panic(notIsolated) + case kbot+1 < n && h[(kbot+1)*ldh+kbot] != 0: + panic(notIsolated) + } + + // Machine constants. + ulp := dlamchP + smlnum := float64(n) / ulp * dlamchS + + // Setup deflation window. + var s float64 + kwtop := kbot - jw + 1 + if kwtop != ktop { + s = h[kwtop*ldh+kwtop-1] + } + if kwtop == kbot { + // 1×1 deflation window. + sr[kwtop] = h[kwtop*ldh+kwtop] + si[kwtop] = 0 + ns = 1 + nd = 0 + if math.Abs(s) <= math.Max(smlnum, ulp*math.Abs(h[kwtop*ldh+kwtop])) { + ns = 0 + nd = 1 + if kwtop > ktop { + h[kwtop*ldh+kwtop-1] = 0 + } + } + work[0] = 1 + return ns, nd + } + + // Convert to spike-triangular form. In case of a rare QR failure, this + // routine continues to do aggressive early deflation using that part of + // the deflation window that converged using infqr here and there to + // keep track. + impl.Dlacpy(blas.Upper, jw, jw, h[kwtop*ldh+kwtop:], ldh, t, ldt) + bi := blas64.Implementation() + bi.Dcopy(jw-1, h[(kwtop+1)*ldh+kwtop:], ldh+1, t[ldt:], ldt+1) + impl.Dlaset(blas.All, jw, jw, 0, 1, v, ldv) + nmin := impl.Ilaenv(12, "DLAQR3", "SV", jw, 0, jw-1, lwork) + var infqr int + if recur > 0 && jw > nmin { + infqr = impl.Dlaqr04(true, true, jw, 0, jw-1, t, ldt, sr[kwtop:], si[kwtop:], 0, jw-1, v, ldv, work, lwork, recur-1) + } else { + infqr = impl.Dlahqr(true, true, jw, 0, jw-1, t, ldt, sr[kwtop:], si[kwtop:], 0, jw-1, v, ldv) + } + // Note that ilo == 0 which conveniently coincides with the success + // value of infqr, that is, infqr as an index always points to the first + // converged eigenvalue. + + // Dtrexc needs a clean margin near the diagonal. + for j := 0; j < jw-3; j++ { + t[(j+2)*ldt+j] = 0 + t[(j+3)*ldt+j] = 0 + } + if jw >= 3 { + t[(jw-1)*ldt+jw-3] = 0 + } + + ns = jw + ilst := infqr + // Deflation detection loop. + for ilst < ns { + bulge := false + if ns >= 2 { + bulge = t[(ns-1)*ldt+ns-2] != 0 + } + if !bulge { + // Real eigenvalue. + abst := math.Abs(t[(ns-1)*ldt+ns-1]) + if abst == 0 { + abst = math.Abs(s) + } + if math.Abs(s*v[ns-1]) <= math.Max(smlnum, ulp*abst) { + // Deflatable. + ns-- + } else { + // Undeflatable, move it up out of the way. + // Dtrexc can not fail in this case. + _, ilst, _ = impl.Dtrexc(lapack.UpdateSchur, jw, t, ldt, v, ldv, ns-1, ilst, work) + ilst++ + } + continue + } + // Complex conjugate pair. + abst := math.Abs(t[(ns-1)*ldt+ns-1]) + math.Sqrt(math.Abs(t[(ns-1)*ldt+ns-2]))*math.Sqrt(math.Abs(t[(ns-2)*ldt+ns-1])) + if abst == 0 { + abst = math.Abs(s) + } + if math.Max(math.Abs(s*v[ns-1]), math.Abs(s*v[ns-2])) <= math.Max(smlnum, ulp*abst) { + // Deflatable. + ns -= 2 + } else { + // Undeflatable, move them up out of the way. + // Dtrexc does the right thing with ilst in case of a + // rare exchange failure. + _, ilst, _ = impl.Dtrexc(lapack.UpdateSchur, jw, t, ldt, v, ldv, ns-1, ilst, work) + ilst += 2 + } + } + + // Return to Hessenberg form. + if ns == 0 { + s = 0 + } + if ns < jw { + // Sorting diagonal blocks of T improves accuracy for graded + // matrices. Bubble sort deals well with exchange failures. + sorted := false + i := ns + for !sorted { + sorted = true + kend := i - 1 + i = infqr + var k int + if i == ns-1 || t[(i+1)*ldt+i] == 0 { + k = i + 1 + } else { + k = i + 2 + } + for k <= kend { + var evi float64 + if k == i+1 { + evi = math.Abs(t[i*ldt+i]) + } else { + evi = math.Abs(t[i*ldt+i]) + math.Sqrt(math.Abs(t[(i+1)*ldt+i]))*math.Sqrt(math.Abs(t[i*ldt+i+1])) + } + + var evk float64 + if k == kend || t[(k+1)*ldt+k] == 0 { + evk = math.Abs(t[k*ldt+k]) + } else { + evk = math.Abs(t[k*ldt+k]) + math.Sqrt(math.Abs(t[(k+1)*ldt+k]))*math.Sqrt(math.Abs(t[k*ldt+k+1])) + } + + if evi >= evk { + i = k + } else { + sorted = false + _, ilst, ok := impl.Dtrexc(lapack.UpdateSchur, jw, t, ldt, v, ldv, i, k, work) + if ok { + i = ilst + } else { + i = k + } + } + if i == kend || t[(i+1)*ldt+i] == 0 { + k = i + 1 + } else { + k = i + 2 + } + } + } + } + + // Restore shift/eigenvalue array from T. + for i := jw - 1; i >= infqr; { + if i == infqr || t[i*ldt+i-1] == 0 { + sr[kwtop+i] = t[i*ldt+i] + si[kwtop+i] = 0 + i-- + continue + } + aa := t[(i-1)*ldt+i-1] + bb := t[(i-1)*ldt+i] + cc := t[i*ldt+i-1] + dd := t[i*ldt+i] + _, _, _, _, sr[kwtop+i-1], si[kwtop+i-1], sr[kwtop+i], si[kwtop+i], _, _ = impl.Dlanv2(aa, bb, cc, dd) + i -= 2 + } + + if ns < jw || s == 0 { + if ns > 1 && s != 0 { + // Reflect spike back into lower triangle. + bi.Dcopy(ns, v[:ns], 1, work[:ns], 1) + _, tau := impl.Dlarfg(ns, work[0], work[1:ns], 1) + work[0] = 1 + impl.Dlaset(blas.Lower, jw-2, jw-2, 0, 0, t[2*ldt:], ldt) + impl.Dlarf(blas.Left, ns, jw, work[:ns], 1, tau, t, ldt, work[jw:]) + impl.Dlarf(blas.Right, ns, ns, work[:ns], 1, tau, t, ldt, work[jw:]) + impl.Dlarf(blas.Right, jw, ns, work[:ns], 1, tau, v, ldv, work[jw:]) + impl.Dgehrd(jw, 0, ns-1, t, ldt, work[:jw-1], work[jw:], lwork-jw) + } + + // Copy updated reduced window into place. + if kwtop > 0 { + h[kwtop*ldh+kwtop-1] = s * v[0] + } + impl.Dlacpy(blas.Upper, jw, jw, t, ldt, h[kwtop*ldh+kwtop:], ldh) + bi.Dcopy(jw-1, t[ldt:], ldt+1, h[(kwtop+1)*ldh+kwtop:], ldh+1) + + // Accumulate orthogonal matrix in order to update H and Z, if + // requested. + if ns > 1 && s != 0 { + // work[:ns-1] contains the elementary reflectors stored + // by a call to Dgehrd above. + impl.Dormhr(blas.Right, blas.NoTrans, jw, ns, 0, ns-1, + t, ldt, work[:ns-1], v, ldv, work[jw:], lwork-jw) + } + + // Update vertical slab in H. + var ltop int + if !wantt { + ltop = ktop + } + for krow := ltop; krow < kwtop; krow += nv { + kln := min(nv, kwtop-krow) + bi.Dgemm(blas.NoTrans, blas.NoTrans, kln, jw, jw, + 1, h[krow*ldh+kwtop:], ldh, v, ldv, + 0, wv, ldwv) + impl.Dlacpy(blas.All, kln, jw, wv, ldwv, h[krow*ldh+kwtop:], ldh) + } + + // Update horizontal slab in H. + if wantt { + for kcol := kbot + 1; kcol < n; kcol += nh { + kln := min(nh, n-kcol) + bi.Dgemm(blas.Trans, blas.NoTrans, jw, kln, jw, + 1, v, ldv, h[kwtop*ldh+kcol:], ldh, + 0, t, ldt) + impl.Dlacpy(blas.All, jw, kln, t, ldt, h[kwtop*ldh+kcol:], ldh) + } + } + + // Update vertical slab in Z. + if wantz { + for krow := iloz; krow <= ihiz; krow += nv { + kln := min(nv, ihiz-krow+1) + bi.Dgemm(blas.NoTrans, blas.NoTrans, kln, jw, jw, + 1, z[krow*ldz+kwtop:], ldz, v, ldv, + 0, wv, ldwv) + impl.Dlacpy(blas.All, kln, jw, wv, ldwv, z[krow*ldz+kwtop:], ldz) + } + } + } + + // The number of deflations. + nd = jw - ns + // Shifts are converged eigenvalues that could not be deflated. + // Subtracting infqr from the spike length takes care of the case of a + // rare QR failure while calculating eigenvalues of the deflation + // window. + ns -= infqr + work[0] = float64(lwkopt) + return ns, nd +} diff --git a/vendor/gonum.org/v1/gonum/lapack/gonum/dlaqr5.go b/vendor/gonum.org/v1/gonum/lapack/gonum/dlaqr5.go new file mode 100644 index 0000000000..443a53d5c4 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dlaqr5.go @@ -0,0 +1,560 @@ +// Copyright ©2016 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gonum + +import ( + "math" + + "gonum.org/v1/gonum/blas" + "gonum.org/v1/gonum/blas/blas64" +) + +// Dlaqr5 performs a single small-bulge multi-shift QR sweep on an isolated +// block of a Hessenberg matrix. +// +// wantt and wantz determine whether the quasi-triangular Schur factor and the +// orthogonal Schur factor, respectively, will be computed. +// +// kacc22 specifies the computation mode of far-from-diagonal orthogonal +// updates. Permitted values are: +// +// 0: Dlaqr5 will not accumulate reflections and will not use matrix-matrix +// multiply to update far-from-diagonal matrix entries. +// 1: Dlaqr5 will accumulate reflections and use matrix-matrix multiply to +// update far-from-diagonal matrix entries. +// 2: Same as kacc22=1. This option used to enable exploiting the 2×2 structure +// during matrix multiplications, but this is no longer supported. +// +// For other values of kacc2 Dlaqr5 will panic. +// +// n is the order of the Hessenberg matrix H. +// +// ktop and kbot are indices of the first and last row and column of an isolated +// diagonal block upon which the QR sweep will be applied. It must hold that +// +// ktop == 0, or 0 < ktop <= n-1 and H[ktop, ktop-1] == 0, and +// kbot == n-1, or 0 <= kbot < n-1 and H[kbot+1, kbot] == 0, +// +// otherwise Dlaqr5 will panic. +// +// nshfts is the number of simultaneous shifts. It must be positive and even, +// otherwise Dlaqr5 will panic. +// +// sr and si contain the real and imaginary parts, respectively, of the shifts +// of origin that define the multi-shift QR sweep. On return both slices may be +// reordered by Dlaqr5. Their length must be equal to nshfts, otherwise Dlaqr5 +// will panic. +// +// h and ldh represent the Hessenberg matrix H of size n×n. On return +// multi-shift QR sweep with shifts sr+i*si has been applied to the isolated +// diagonal block in rows and columns ktop through kbot, inclusive. +// +// iloz and ihiz specify the rows of Z to which transformations will be applied +// if wantz is true. It must hold that 0 <= iloz <= ihiz < n, otherwise Dlaqr5 +// will panic. +// +// z and ldz represent the matrix Z of size n×n. If wantz is true, the QR sweep +// orthogonal similarity transformation is accumulated into +// z[iloz:ihiz,iloz:ihiz] from the right, otherwise z not referenced. +// +// v and ldv represent an auxiliary matrix V of size (nshfts/2)×3. Note that V +// is transposed with respect to the reference netlib implementation. +// +// u and ldu represent an auxiliary matrix of size (2*nshfts)×(2*nshfts). +// +// wh and ldwh represent an auxiliary matrix of size (2*nshfts-1)×nh. +// +// wv and ldwv represent an auxiliary matrix of size nv×(2*nshfts-1). +// +// Dlaqr5 is an internal routine. It is exported for testing purposes. +func (impl Implementation) Dlaqr5(wantt, wantz bool, kacc22 int, n, ktop, kbot, nshfts int, sr, si []float64, h []float64, ldh int, iloz, ihiz int, z []float64, ldz int, v []float64, ldv int, u []float64, ldu int, nv int, wv []float64, ldwv int, nh int, wh []float64, ldwh int) { + switch { + case kacc22 != 0 && kacc22 != 1 && kacc22 != 2: + panic(badKacc22) + case n < 0: + panic(nLT0) + case ktop < 0 || n <= ktop: + panic(badKtop) + case kbot < 0 || n <= kbot: + panic(badKbot) + + case nshfts < 0: + panic(nshftsLT0) + case nshfts&0x1 != 0: + panic(nshftsOdd) + case len(sr) != nshfts: + panic(badLenSr) + case len(si) != nshfts: + panic(badLenSi) + + case ldh < max(1, n): + panic(badLdH) + case len(h) < (n-1)*ldh+n: + panic(shortH) + + case wantz && ihiz >= n: + panic(badIhiz) + case wantz && iloz < 0 || ihiz < iloz: + panic(badIloz) + case ldz < 1, wantz && ldz < n: + panic(badLdZ) + case wantz && len(z) < (n-1)*ldz+n: + panic(shortZ) + + case ldv < 3: + // V is transposed w.r.t. reference lapack. + panic(badLdV) + case len(v) < (nshfts/2-1)*ldv+3: + panic(shortV) + + case ldu < max(1, 2*nshfts): + panic(badLdU) + case len(u) < (2*nshfts-1)*ldu+2*nshfts: + panic(shortU) + + case nv < 0: + panic(nvLT0) + case ldwv < max(1, 2*nshfts): + panic(badLdWV) + case len(wv) < (nv-1)*ldwv+2*nshfts: + panic(shortWV) + + case nh < 0: + panic(nhLT0) + case ldwh < max(1, nh): + panic(badLdWH) + case len(wh) < (2*nshfts-1)*ldwh+nh: + panic(shortWH) + + case ktop > 0 && h[ktop*ldh+ktop-1] != 0: + panic(notIsolated) + case kbot < n-1 && h[(kbot+1)*ldh+kbot] != 0: + panic(notIsolated) + } + + // If there are no shifts, then there is nothing to do. + if nshfts < 2 { + return + } + // If the active block is empty or 1×1, then there is nothing to do. + if ktop >= kbot { + return + } + + // Shuffle shifts into pairs of real shifts and pairs of complex + // conjugate shifts assuming complex conjugate shifts are already + // adjacent to one another. + for i := 0; i < nshfts-2; i += 2 { + if si[i] == -si[i+1] { + continue + } + sr[i], sr[i+1], sr[i+2] = sr[i+1], sr[i+2], sr[i] + si[i], si[i+1], si[i+2] = si[i+1], si[i+2], si[i] + } + + // Note: lapack says that nshfts must be even but allows it to be odd + // anyway. We panic above if nshfts is not even, so reducing it by one + // is unnecessary. The only caller Dlaqr04 uses only even nshfts. + // + // The original comment and code from lapack-3.6.0/SRC/dlaqr5.f:341: + // * ==== NSHFTS is supposed to be even, but if it is odd, + // * . then simply reduce it by one. The shuffle above + // * . ensures that the dropped shift is real and that + // * . the remaining shifts are paired. ==== + // * + // NS = NSHFTS - MOD( NSHFTS, 2 ) + ns := nshfts + + safmin := dlamchS + ulp := dlamchP + smlnum := safmin * float64(n) / ulp + + // Use accumulated reflections to update far-from-diagonal entries? + accum := kacc22 == 1 || kacc22 == 2 + + // Clear trash. + if ktop+2 <= kbot { + h[(ktop+2)*ldh+ktop] = 0 + } + + // nbmps = number of 2-shift bulges in the chain. + nbmps := ns / 2 + + // kdu = width of slab. + kdu := 4 * nbmps + + // Create and chase chains of nbmps bulges. + for incol := ktop - 2*nbmps + 1; incol <= kbot-2; incol += 2 * nbmps { + // jtop is an index from which updates from the right start. + var jtop int + switch { + case accum: + jtop = max(ktop, incol) + case wantt: + default: + jtop = ktop + } + ndcol := incol + kdu + if accum { + impl.Dlaset(blas.All, kdu, kdu, 0, 1, u, ldu) + } + // Near-the-diagonal bulge chase. The following loop performs + // the near-the-diagonal part of a small bulge multi-shift QR + // sweep. Each 4*nbmps column diagonal chunk extends from + // column incol to column ndcol (including both column incol and + // column ndcol). The following loop chases a 2*nbmps+1 column + // long chain of nbmps bulges 2*nbmps columns to the right. + // (incol may be less than ktop and ndcol may be greater than + // kbot indicating phantom columns from which to chase bulges + // before they are actually introduced or to which to chase + // bulges beyond column kbot.) + for krcol := incol; krcol <= min(incol+2*nbmps-1, kbot-2); krcol++ { + // Bulges number mtop to mbot are active double implicit + // shift bulges. There may or may not also be small 2×2 + // bulge, if there is room. The inactive bulges (if any) + // must wait until the active bulges have moved down the + // diagonal to make room. The phantom matrix paradigm + // described above helps keep track. + mtop := max(0, (ktop-krcol)/2) + mbot := min(nbmps, (kbot-krcol-1)/2) - 1 + m22 := mbot + 1 + bmp22 := (mbot < nbmps-1) && (krcol+2*m22 == kbot-2) + // Generate reflections to chase the chain right one column. + // The minimum value of k is ktop-1. + if bmp22 { + // Special case: 2×2 reflection at bottom treated separately. + k := krcol + 2*m22 + if k == ktop-1 { + impl.Dlaqr1(2, h[(k+1)*ldh+k+1:], ldh, + sr[2*m22], si[2*m22], sr[2*m22+1], si[2*m22+1], + v[m22*ldv:m22*ldv+2]) + beta := v[m22*ldv] + _, v[m22*ldv] = impl.Dlarfg(2, beta, v[m22*ldv+1:m22*ldv+2], 1) + } else { + beta := h[(k+1)*ldh+k] + v[m22*ldv+1] = h[(k+2)*ldh+k] + beta, v[m22*ldv] = impl.Dlarfg(2, beta, v[m22*ldv+1:m22*ldv+2], 1) + h[(k+1)*ldh+k] = beta + h[(k+2)*ldh+k] = 0 + } + // Perform update from right within computational window. + t1 := v[m22*ldv] + t2 := t1 * v[m22*ldv+1] + for j := jtop; j <= min(kbot, k+3); j++ { + refsum := h[j*ldh+k+1] + v[m22*ldv+1]*h[j*ldh+k+2] + h[j*ldh+k+1] -= refsum * t1 + h[j*ldh+k+2] -= refsum * t2 + } + // Perform update from left within computational window. + var jbot int + switch { + case accum: + jbot = min(ndcol, kbot) + case wantt: + jbot = n - 1 + default: + jbot = kbot + } + t1 = v[m22*ldv] + t2 = t1 * v[m22*ldv+1] + for j := k + 1; j <= jbot; j++ { + refsum := h[(k+1)*ldh+j] + v[m22*ldv+1]*h[(k+2)*ldh+j] + h[(k+1)*ldh+j] -= refsum * t1 + h[(k+2)*ldh+j] -= refsum * t2 + } + // The following convergence test requires that the traditional + // small-compared-to-nearby-diagonals criterion and the Ahues & + // Tisseur (LAWN 122, 1997) criteria both be satisfied. The latter + // improves accuracy in some examples. Falling back on an alternate + // convergence criterion when tst1 or tst2 is zero (as done here) is + // traditional but probably unnecessary. + if k >= ktop && h[(k+1)*ldh+k] != 0 { + tst1 := math.Abs(h[k*ldh+k]) + math.Abs(h[(k+1)*ldh+k+1]) + if tst1 == 0 { + if k >= ktop+1 { + tst1 += math.Abs(h[k*ldh+k-1]) + } + if k >= ktop+2 { + tst1 += math.Abs(h[k*ldh+k-2]) + } + if k >= ktop+3 { + tst1 += math.Abs(h[k*ldh+k-3]) + } + if k <= kbot-2 { + tst1 += math.Abs(h[(k+2)*ldh+k+1]) + } + if k <= kbot-3 { + tst1 += math.Abs(h[(k+3)*ldh+k+1]) + } + if k <= kbot-4 { + tst1 += math.Abs(h[(k+4)*ldh+k+1]) + } + } + if math.Abs(h[(k+1)*ldh+k]) <= math.Max(smlnum, ulp*tst1) { + h12 := math.Max(math.Abs(h[(k+1)*ldh+k]), math.Abs(h[k*ldh+k+1])) + h21 := math.Min(math.Abs(h[(k+1)*ldh+k]), math.Abs(h[k*ldh+k+1])) + h11 := math.Max(math.Abs(h[(k+1)*ldh+k+1]), math.Abs(h[k*ldh+k]-h[(k+1)*ldh+k+1])) + h22 := math.Min(math.Abs(h[(k+1)*ldh+k+1]), math.Abs(h[k*ldh+k]-h[(k+1)*ldh+k+1])) + scl := h11 + h12 + tst2 := h22 * (h11 / scl) + if tst2 == 0 || h21*(h12/scl) <= math.Max(smlnum, ulp*tst2) { + h[(k+1)*ldh+k] = 0 + } + } + } + // Accumulate orthogonal transformations. + if accum { + kms := k - incol - 1 + t1 = v[m22*ldv] + t2 = t1 * v[m22*ldv+1] + for j := max(0, ktop-incol-1); j < kdu; j++ { + refsum := u[j*ldu+kms+1] + v[m22*ldv+1]*u[j*ldu+kms+2] + u[j*ldu+kms+1] -= refsum * t1 + u[j*ldu+kms+2] -= refsum * t2 + } + } else if wantz { + t1 = v[m22*ldv] + t2 = t1 * v[m22*ldv+1] + for j := iloz; j <= ihiz; j++ { + refsum := z[j*ldz+k+1] + v[m22*ldv+1]*z[j*ldz+k+2] + z[j*ldz+k+1] -= refsum * t1 + z[j*ldz+k+2] -= refsum * t2 + } + } + } + // Normal case: Chain of 3×3 reflections. + for m := mbot; m >= mtop; m-- { + k := krcol + 2*m + if k == ktop-1 { + impl.Dlaqr1(3, h[ktop*ldh+ktop:], ldh, + sr[2*m], si[2*m], sr[2*m+1], si[2*m+1], + v[m*ldv:m*ldv+3]) + alpha := v[m*ldv] + _, v[m*ldv] = impl.Dlarfg(3, alpha, v[m*ldv+1:m*ldv+3], 1) + } else { + // Perform delayed transformation of row below m-th bulge. + // Exploit fact that first two elements of row are actually + // zero. + t1 := v[m*ldv] + t2 := t1 * v[m*ldv+1] + t3 := t1 * v[m*ldv+2] + refsum := v[m*ldv+2] * h[(k+3)*ldh+k+2] + h[(k+3)*ldh+k] = -refsum * t1 + h[(k+3)*ldh+k+1] = -refsum * t2 + h[(k+3)*ldh+k+2] -= refsum * t3 + // Calculate reflection to move m-th bulge one step. + beta := h[(k+1)*ldh+k] + v[m*ldv+1] = h[(k+2)*ldh+k] + v[m*ldv+2] = h[(k+3)*ldh+k] + beta, v[m*ldv] = impl.Dlarfg(3, beta, v[m*ldv+1:m*ldv+3], 1) + // A bulge may collapse because of vigilant deflation or + // destructive underflow. In the underflow case, try the + // two-small-subdiagonals trick to try to reinflate the + // bulge. + if h[(k+3)*ldh+k] != 0 || h[(k+3)*ldh+k+1] != 0 || h[(k+3)*ldh+k+2] == 0 { + // Typical case: not collapsed (yet). + h[(k+1)*ldh+k] = beta + h[(k+2)*ldh+k] = 0 + h[(k+3)*ldh+k] = 0 + } else { + // Atypical case: collapsed. Attempt to reintroduce + // ignoring H[k+1,k] and H[k+2,k]. If the fill resulting + // from the new reflector is too large, then abandon it. + // Otherwise, use the new one. + var vt [3]float64 + impl.Dlaqr1(3, h[(k+1)*ldh+k+1:], ldh, + sr[2*m], si[2*m], sr[2*m+1], si[2*m+1], + vt[:]) + _, vt[0] = impl.Dlarfg(3, vt[0], vt[1:3], 1) + t1 = vt[0] + t2 = t1 * vt[1] + t3 = t1 * vt[2] + refsum = h[(k+1)*ldh+k] + vt[1]*h[(k+2)*ldh+k] + dsum := math.Abs(h[k*ldh+k]) + math.Abs(h[(k+1)*ldh+k+1]) + math.Abs(h[(k+2)*ldh+k+2]) + if math.Abs(h[(k+2)*ldh+k]-refsum*t2)+math.Abs(refsum*t3) > ulp*dsum { + // Starting a new bulge here would create + // non-negligible fill. Use the old one with + // trepidation. + h[(k+1)*ldh+k] = beta + h[(k+2)*ldh+k] = 0 + h[(k+3)*ldh+k] = 0 + } else { + // Starting a new bulge here would create only + // negligible fill. Replace the old reflector with + // the new one. + h[(k+1)*ldh+k] -= refsum * t1 + h[(k+2)*ldh+k] = 0 + h[(k+3)*ldh+k] = 0 + v[m*ldv] = vt[0] + v[m*ldv+1] = vt[1] + v[m*ldv+2] = vt[2] + } + } + } + // Apply reflection from the right and the first column of + // update from the left. These updates are required for the + // vigilant deflation check. We still delay most of the updates + // from the left for efficiency. + t1 := v[m*ldv] + t2 := t1 * v[m*ldv+1] + t3 := t1 * v[m*ldv+2] + for j := jtop; j <= min(kbot, k+3); j++ { + refsum := h[j*ldh+k+1] + v[m*ldv+1]*h[j*ldh+k+2] + v[m*ldv+2]*h[j*ldh+k+3] + h[j*ldh+k+1] -= refsum * t1 + h[j*ldh+k+2] -= refsum * t2 + h[j*ldh+k+3] -= refsum * t3 + } + // Perform update from left for subsequent column. + refsum := h[(k+1)*ldh+k+1] + v[m*ldv+1]*h[(k+2)*ldh+k+1] + v[m*ldv+2]*h[(k+3)*ldh+k+1] + h[(k+1)*ldh+k+1] -= refsum * t1 + h[(k+2)*ldh+k+1] -= refsum * t2 + h[(k+3)*ldh+k+1] -= refsum * t3 + // The following convergence test requires that the tradition + // small-compared-to-nearby-diagonals criterion and the Ahues & + // Tisseur (LAWN 122, 1997) criteria both be satisfied. The + // latter improves accuracy in some examples. Falling back on an + // alternate convergence criterion when tst1 or tst2 is zero (as + // done here) is traditional but probably unnecessary. + if k < ktop { + continue + } + if h[(k+1)*ldh+k] != 0 { + tst1 := math.Abs(h[k*ldh+k]) + math.Abs(h[(k+1)*ldh+k+1]) + if tst1 == 0 { + if k >= ktop+1 { + tst1 += math.Abs(h[k*ldh+k-1]) + } + if k >= ktop+2 { + tst1 += math.Abs(h[k*ldh+k-2]) + } + if k >= ktop+3 { + tst1 += math.Abs(h[k*ldh+k-3]) + } + if k <= kbot-2 { + tst1 += math.Abs(h[(k+2)*ldh+k+1]) + } + if k <= kbot-3 { + tst1 += math.Abs(h[(k+3)*ldh+k+1]) + } + if k <= kbot-4 { + tst1 += math.Abs(h[(k+4)*ldh+k+1]) + } + } + if math.Abs(h[(k+1)*ldh+k]) <= math.Max(smlnum, ulp*tst1) { + h12 := math.Max(math.Abs(h[(k+1)*ldh+k]), math.Abs(h[k*ldh+k+1])) + h21 := math.Min(math.Abs(h[(k+1)*ldh+k]), math.Abs(h[k*ldh+k+1])) + h11 := math.Max(math.Abs(h[(k+1)*ldh+k+1]), math.Abs(h[k*ldh+k]-h[(k+1)*ldh+k+1])) + h22 := math.Min(math.Abs(h[(k+1)*ldh+k+1]), math.Abs(h[k*ldh+k]-h[(k+1)*ldh+k+1])) + scl := h11 + h12 + tst2 := h22 * (h11 / scl) + if tst2 == 0 || h21*(h12/scl) <= math.Max(smlnum, ulp*tst2) { + h[(k+1)*ldh+k] = 0 + } + } + } + } + // Multiply H by reflections from the left. + var jbot int + switch { + case accum: + jbot = min(ndcol, kbot) + case wantt: + jbot = n - 1 + default: + jbot = kbot + } + for m := mbot; m >= mtop; m-- { + k := krcol + 2*m + t1 := v[m*ldv] + t2 := t1 * v[m*ldv+1] + t3 := t1 * v[m*ldv+2] + for j := max(ktop, krcol+2*(m+1)); j <= jbot; j++ { + refsum := h[(k+1)*ldh+j] + v[m*ldv+1]*h[(k+2)*ldh+j] + v[m*ldv+2]*h[(k+3)*ldh+j] + h[(k+1)*ldh+j] -= refsum * t1 + h[(k+2)*ldh+j] -= refsum * t2 + h[(k+3)*ldh+j] -= refsum * t3 + } + } + // Accumulate orthogonal transformations. + if accum { + // Accumulate U. If necessary, update Z later with an + // efficient matrix-matrix multiply. + for m := mbot; m >= mtop; m-- { + k := krcol + 2*m + kms := k - incol - 1 + i2 := max(0, ktop-incol-1) + i2 = max(i2, kms-(krcol-incol)) + i4 := min(kdu, krcol+2*mbot-incol+5) + t1 := v[m*ldv] + t2 := t1 * v[m*ldv+1] + t3 := t1 * v[m*ldv+2] + for j := i2; j < i4; j++ { + refsum := u[j*ldu+kms+1] + v[m*ldv+1]*u[j*ldu+kms+2] + v[m*ldv+2]*u[j*ldu+kms+3] + u[j*ldu+kms+1] -= refsum * t1 + u[j*ldu+kms+2] -= refsum * t2 + u[j*ldu+kms+3] -= refsum * t3 + } + } + } else if wantz { + // U is not accumulated, so update Z now by multiplying by + // reflections from the right. + for m := mbot; m >= mtop; m-- { + k := krcol + 2*m + t1 := v[m*ldv] + t2 := t1 * v[m*ldv+1] + t3 := t1 * v[m*ldv+2] + for j := iloz; j <= ihiz; j++ { + refsum := z[j*ldz+k+1] + v[m*ldv+1]*z[j*ldz+k+2] + v[m*ldv+2]*z[j*ldz+k+3] + z[j*ldz+k+1] -= refsum * t1 + z[j*ldz+k+2] -= refsum * t2 + z[j*ldz+k+3] -= refsum * t3 + } + } + } + } + // Use U (if accumulated) to update far-from-diagonal entries in H. + // If required, use U to update Z as well. + if !accum { + continue + } + jtop, jbot := ktop, kbot + if wantt { + jtop = 0 + jbot = n - 1 + } + bi := blas64.Implementation() + k1 := max(0, ktop-incol-1) + nu := kdu - max(0, ndcol-kbot) - k1 + // Horizontal multiply. + for jcol := min(ndcol, kbot) + 1; jcol <= jbot; jcol += nh { + jlen := min(nh, jbot-jcol+1) + bi.Dgemm(blas.Trans, blas.NoTrans, nu, jlen, nu, + 1, u[k1*ldu+k1:], ldu, + h[(incol+k1+1)*ldh+jcol:], ldh, + 0, wh, ldwh) + impl.Dlacpy(blas.All, nu, jlen, wh, ldwh, h[(incol+k1+1)*ldh+jcol:], ldh) + } + // Vertical multiply. + for jrow := jtop; jrow < max(ktop, incol); jrow += nv { + jlen := min(nv, max(ktop, incol)-jrow) + bi.Dgemm(blas.NoTrans, blas.NoTrans, jlen, nu, nu, + 1, h[jrow*ldh+incol+k1+1:], ldh, + u[k1*ldu+k1:], ldu, + 0, wv, ldwv) + impl.Dlacpy(blas.All, jlen, nu, wv, ldwv, h[jrow*ldh+incol+k1+1:], ldh) + } + // Z multiply (also vertical). + if wantz { + for jrow := iloz; jrow <= ihiz; jrow += nv { + jlen := min(nv, ihiz-jrow+1) + bi.Dgemm(blas.NoTrans, blas.NoTrans, jlen, nu, nu, + 1, z[jrow*ldz+incol+k1+1:], ldz, + u[k1*ldu+k1:], ldu, + 0, wv, ldwv) + impl.Dlacpy(blas.All, jlen, nu, wv, ldwv, z[jrow*ldz+incol+k1+1:], ldz) + } + } + } +} diff --git a/vendor/gonum.org/v1/gonum/lapack/gonum/dlarf.go b/vendor/gonum.org/v1/gonum/lapack/gonum/dlarf.go new file mode 100644 index 0000000000..16581a1b4e --- /dev/null +++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dlarf.go @@ -0,0 +1,102 @@ +// Copyright ©2015 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gonum + +import ( + "gonum.org/v1/gonum/blas" + "gonum.org/v1/gonum/blas/blas64" +) + +// Dlarf applies an elementary reflector H to an m×n matrix C: +// +// C = H * C if side == blas.Left +// C = C * H if side == blas.Right +// +// H is represented in the form +// +// H = I - tau * v * vᵀ +// +// where tau is a scalar and v is a vector. +// +// work must have length at least m if side == blas.Left and +// at least n if side == blas.Right. +// +// Dlarf is an internal routine. It is exported for testing purposes. +func (impl Implementation) Dlarf(side blas.Side, m, n int, v []float64, incv int, tau float64, c []float64, ldc int, work []float64) { + switch { + case side != blas.Left && side != blas.Right: + panic(badSide) + case m < 0: + panic(mLT0) + case n < 0: + panic(nLT0) + case incv == 0: + panic(zeroIncV) + case ldc < max(1, n): + panic(badLdC) + } + + if m == 0 || n == 0 { + return + } + + applyleft := side == blas.Left + lenV := n + if applyleft { + lenV = m + } + + switch { + case len(v) < 1+(lenV-1)*abs(incv): + panic(shortV) + case len(c) < (m-1)*ldc+n: + panic(shortC) + case (applyleft && len(work) < n) || (!applyleft && len(work) < m): + panic(shortWork) + } + + lastv := -1 // last non-zero element of v + lastc := -1 // last non-zero row/column of C + if tau != 0 { + if applyleft { + lastv = m - 1 + } else { + lastv = n - 1 + } + var i int + if incv > 0 { + i = lastv * incv + } + // Look for the last non-zero row in v. + for lastv >= 0 && v[i] == 0 { + lastv-- + i -= incv + } + if applyleft { + // Scan for the last non-zero column in C[0:lastv, :] + lastc = impl.Iladlc(lastv+1, n, c, ldc) + } else { + // Scan for the last non-zero row in C[:, 0:lastv] + lastc = impl.Iladlr(m, lastv+1, c, ldc) + } + } + if lastv == -1 || lastc == -1 { + return + } + bi := blas64.Implementation() + if applyleft { + // Form H * C + // w[0:lastc+1] = c[1:lastv+1, 1:lastc+1]ᵀ * v[1:lastv+1,1] + bi.Dgemv(blas.Trans, lastv+1, lastc+1, 1, c, ldc, v, incv, 0, work, 1) + // c[0: lastv, 0: lastc] = c[...] - w[0:lastv, 1] * v[1:lastc, 1]ᵀ + bi.Dger(lastv+1, lastc+1, -tau, v, incv, work, 1, c, ldc) + } else { + // Form C * H + // w[0:lastc+1,1] := c[0:lastc+1,0:lastv+1] * v[0:lastv+1,1] + bi.Dgemv(blas.NoTrans, lastc+1, lastv+1, 1, c, ldc, v, incv, 0, work, 1) + // c[0:lastc+1,0:lastv+1] = c[...] - w[0:lastc+1,0] * v[0:lastv+1,0]ᵀ + bi.Dger(lastc+1, lastv+1, -tau, work, 1, v, incv, c, ldc) + } +} diff --git a/vendor/gonum.org/v1/gonum/lapack/gonum/dlarfb.go b/vendor/gonum.org/v1/gonum/lapack/gonum/dlarfb.go new file mode 100644 index 0000000000..eb43ca74ce --- /dev/null +++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dlarfb.go @@ -0,0 +1,461 @@ +// Copyright ©2015 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gonum + +import ( + "gonum.org/v1/gonum/blas" + "gonum.org/v1/gonum/blas/blas64" + "gonum.org/v1/gonum/lapack" +) + +// Dlarfb applies a block reflector to a matrix. +// +// In the call to Dlarfb, the mxn c is multiplied by the implicitly defined matrix h as follows: +// +// c = h * c if side == Left and trans == NoTrans +// c = c * h if side == Right and trans == NoTrans +// c = hᵀ * c if side == Left and trans == Trans +// c = c * hᵀ if side == Right and trans == Trans +// +// h is a product of elementary reflectors. direct sets the direction of multiplication +// +// h = h_1 * h_2 * ... * h_k if direct == Forward +// h = h_k * h_k-1 * ... * h_1 if direct == Backward +// +// The combination of direct and store defines the orientation of the elementary +// reflectors. In all cases the ones on the diagonal are implicitly represented. +// +// If direct == lapack.Forward and store == lapack.ColumnWise +// +// V = [ 1 ] +// [v1 1 ] +// [v1 v2 1] +// [v1 v2 v3] +// [v1 v2 v3] +// +// If direct == lapack.Forward and store == lapack.RowWise +// +// V = [ 1 v1 v1 v1 v1] +// [ 1 v2 v2 v2] +// [ 1 v3 v3] +// +// If direct == lapack.Backward and store == lapack.ColumnWise +// +// V = [v1 v2 v3] +// [v1 v2 v3] +// [ 1 v2 v3] +// [ 1 v3] +// [ 1] +// +// If direct == lapack.Backward and store == lapack.RowWise +// +// V = [v1 v1 1 ] +// [v2 v2 v2 1 ] +// [v3 v3 v3 v3 1] +// +// An elementary reflector can be explicitly constructed by extracting the +// corresponding elements of v, placing a 1 where the diagonal would be, and +// placing zeros in the remaining elements. +// +// t is a k×k matrix containing the block reflector, and this function will panic +// if t is not of sufficient size. See Dlarft for more information. +// +// work is a temporary storage matrix with stride ldwork. +// work must be of size at least n×k side == Left and m×k if side == Right, and +// this function will panic if this size is not met. +// +// Dlarfb is an internal routine. It is exported for testing purposes. +func (Implementation) Dlarfb(side blas.Side, trans blas.Transpose, direct lapack.Direct, store lapack.StoreV, m, n, k int, v []float64, ldv int, t []float64, ldt int, c []float64, ldc int, work []float64, ldwork int) { + nv := m + if side == blas.Right { + nv = n + } + switch { + case side != blas.Left && side != blas.Right: + panic(badSide) + case trans != blas.Trans && trans != blas.NoTrans: + panic(badTrans) + case direct != lapack.Forward && direct != lapack.Backward: + panic(badDirect) + case store != lapack.ColumnWise && store != lapack.RowWise: + panic(badStoreV) + case m < 0: + panic(mLT0) + case n < 0: + panic(nLT0) + case k < 0: + panic(kLT0) + case store == lapack.ColumnWise && ldv < max(1, k): + panic(badLdV) + case store == lapack.RowWise && ldv < max(1, nv): + panic(badLdV) + case ldt < max(1, k): + panic(badLdT) + case ldc < max(1, n): + panic(badLdC) + case ldwork < max(1, k): + panic(badLdWork) + } + + if m == 0 || n == 0 { + return + } + + nw := n + if side == blas.Right { + nw = m + } + switch { + case store == lapack.ColumnWise && len(v) < (nv-1)*ldv+k: + panic(shortV) + case store == lapack.RowWise && len(v) < (k-1)*ldv+nv: + panic(shortV) + case len(t) < (k-1)*ldt+k: + panic(shortT) + case len(c) < (m-1)*ldc+n: + panic(shortC) + case len(work) < (nw-1)*ldwork+k: + panic(shortWork) + } + + bi := blas64.Implementation() + + transt := blas.Trans + if trans == blas.Trans { + transt = blas.NoTrans + } + // TODO(btracey): This follows the original Lapack code where the + // elements are copied into the columns of the working array. The + // loops should go in the other direction so the data is written + // into the rows of work so the copy is not strided. A bigger change + // would be to replace work with workᵀ, but benchmarks would be + // needed to see if the change is merited. + if store == lapack.ColumnWise { + if direct == lapack.Forward { + // V1 is the first k rows of C. V2 is the remaining rows. + if side == blas.Left { + // W = Cᵀ V = C1ᵀ V1 + C2ᵀ V2 (stored in work). + + // W = C1. + for j := 0; j < k; j++ { + bi.Dcopy(n, c[j*ldc:], 1, work[j:], ldwork) + } + // W = W * V1. + bi.Dtrmm(blas.Right, blas.Lower, blas.NoTrans, blas.Unit, + n, k, 1, + v, ldv, + work, ldwork) + if m > k { + // W = W + C2ᵀ V2. + bi.Dgemm(blas.Trans, blas.NoTrans, n, k, m-k, + 1, c[k*ldc:], ldc, v[k*ldv:], ldv, + 1, work, ldwork) + } + // W = W * Tᵀ or W * T. + bi.Dtrmm(blas.Right, blas.Upper, transt, blas.NonUnit, n, k, + 1, t, ldt, + work, ldwork) + // C -= V * Wᵀ. + if m > k { + // C2 -= V2 * Wᵀ. + bi.Dgemm(blas.NoTrans, blas.Trans, m-k, n, k, + -1, v[k*ldv:], ldv, work, ldwork, + 1, c[k*ldc:], ldc) + } + // W *= V1ᵀ. + bi.Dtrmm(blas.Right, blas.Lower, blas.Trans, blas.Unit, n, k, + 1, v, ldv, + work, ldwork) + // C1 -= Wᵀ. + // TODO(btracey): This should use blas.Axpy. + for i := 0; i < n; i++ { + for j := 0; j < k; j++ { + c[j*ldc+i] -= work[i*ldwork+j] + } + } + return + } + // Form C = C * H or C * Hᵀ, where C = (C1 C2). + + // W = C1. + for i := 0; i < k; i++ { + bi.Dcopy(m, c[i:], ldc, work[i:], ldwork) + } + // W *= V1. + bi.Dtrmm(blas.Right, blas.Lower, blas.NoTrans, blas.Unit, m, k, + 1, v, ldv, + work, ldwork) + if n > k { + bi.Dgemm(blas.NoTrans, blas.NoTrans, m, k, n-k, + 1, c[k:], ldc, v[k*ldv:], ldv, + 1, work, ldwork) + } + // W *= T or Tᵀ. + bi.Dtrmm(blas.Right, blas.Upper, trans, blas.NonUnit, m, k, + 1, t, ldt, + work, ldwork) + if n > k { + bi.Dgemm(blas.NoTrans, blas.Trans, m, n-k, k, + -1, work, ldwork, v[k*ldv:], ldv, + 1, c[k:], ldc) + } + // C -= W * Vᵀ. + bi.Dtrmm(blas.Right, blas.Lower, blas.Trans, blas.Unit, m, k, + 1, v, ldv, + work, ldwork) + // C -= W. + // TODO(btracey): This should use blas.Axpy. + for i := 0; i < m; i++ { + for j := 0; j < k; j++ { + c[i*ldc+j] -= work[i*ldwork+j] + } + } + return + } + // V = (V1) + // = (V2) (last k rows) + // Where V2 is unit upper triangular. + if side == blas.Left { + // Form H * C or + // W = Cᵀ V. + + // W = C2ᵀ. + for j := 0; j < k; j++ { + bi.Dcopy(n, c[(m-k+j)*ldc:], 1, work[j:], ldwork) + } + // W *= V2. + bi.Dtrmm(blas.Right, blas.Upper, blas.NoTrans, blas.Unit, n, k, + 1, v[(m-k)*ldv:], ldv, + work, ldwork) + if m > k { + // W += C1ᵀ * V1. + bi.Dgemm(blas.Trans, blas.NoTrans, n, k, m-k, + 1, c, ldc, v, ldv, + 1, work, ldwork) + } + // W *= T or Tᵀ. + bi.Dtrmm(blas.Right, blas.Lower, transt, blas.NonUnit, n, k, + 1, t, ldt, + work, ldwork) + // C -= V * Wᵀ. + if m > k { + bi.Dgemm(blas.NoTrans, blas.Trans, m-k, n, k, + -1, v, ldv, work, ldwork, + 1, c, ldc) + } + // W *= V2ᵀ. + bi.Dtrmm(blas.Right, blas.Upper, blas.Trans, blas.Unit, n, k, + 1, v[(m-k)*ldv:], ldv, + work, ldwork) + // C2 -= Wᵀ. + // TODO(btracey): This should use blas.Axpy. + for i := 0; i < n; i++ { + for j := 0; j < k; j++ { + c[(m-k+j)*ldc+i] -= work[i*ldwork+j] + } + } + return + } + // Form C * H or C * Hᵀ where C = (C1 C2). + // W = C * V. + + // W = C2. + for j := 0; j < k; j++ { + bi.Dcopy(m, c[n-k+j:], ldc, work[j:], ldwork) + } + + // W = W * V2. + bi.Dtrmm(blas.Right, blas.Upper, blas.NoTrans, blas.Unit, m, k, + 1, v[(n-k)*ldv:], ldv, + work, ldwork) + if n > k { + bi.Dgemm(blas.NoTrans, blas.NoTrans, m, k, n-k, + 1, c, ldc, v, ldv, + 1, work, ldwork) + } + // W *= T or Tᵀ. + bi.Dtrmm(blas.Right, blas.Lower, trans, blas.NonUnit, m, k, + 1, t, ldt, + work, ldwork) + // C -= W * Vᵀ. + if n > k { + // C1 -= W * V1ᵀ. + bi.Dgemm(blas.NoTrans, blas.Trans, m, n-k, k, + -1, work, ldwork, v, ldv, + 1, c, ldc) + } + // W *= V2ᵀ. + bi.Dtrmm(blas.Right, blas.Upper, blas.Trans, blas.Unit, m, k, + 1, v[(n-k)*ldv:], ldv, + work, ldwork) + // C2 -= W. + // TODO(btracey): This should use blas.Axpy. + for i := 0; i < m; i++ { + for j := 0; j < k; j++ { + c[i*ldc+n-k+j] -= work[i*ldwork+j] + } + } + return + } + // Store = Rowwise. + if direct == lapack.Forward { + // V = (V1 V2) where v1 is unit upper triangular. + if side == blas.Left { + // Form H * C or Hᵀ * C where C = (C1; C2). + // W = Cᵀ * Vᵀ. + + // W = C1ᵀ. + for j := 0; j < k; j++ { + bi.Dcopy(n, c[j*ldc:], 1, work[j:], ldwork) + } + // W *= V1ᵀ. + bi.Dtrmm(blas.Right, blas.Upper, blas.Trans, blas.Unit, n, k, + 1, v, ldv, + work, ldwork) + if m > k { + bi.Dgemm(blas.Trans, blas.Trans, n, k, m-k, + 1, c[k*ldc:], ldc, v[k:], ldv, + 1, work, ldwork) + } + // W *= T or Tᵀ. + bi.Dtrmm(blas.Right, blas.Upper, transt, blas.NonUnit, n, k, + 1, t, ldt, + work, ldwork) + // C -= Vᵀ * Wᵀ. + if m > k { + bi.Dgemm(blas.Trans, blas.Trans, m-k, n, k, + -1, v[k:], ldv, work, ldwork, + 1, c[k*ldc:], ldc) + } + // W *= V1. + bi.Dtrmm(blas.Right, blas.Upper, blas.NoTrans, blas.Unit, n, k, + 1, v, ldv, + work, ldwork) + // C1 -= Wᵀ. + // TODO(btracey): This should use blas.Axpy. + for i := 0; i < n; i++ { + for j := 0; j < k; j++ { + c[j*ldc+i] -= work[i*ldwork+j] + } + } + return + } + // Form C * H or C * Hᵀ where C = (C1 C2). + // W = C * Vᵀ. + + // W = C1. + for j := 0; j < k; j++ { + bi.Dcopy(m, c[j:], ldc, work[j:], ldwork) + } + // W *= V1ᵀ. + bi.Dtrmm(blas.Right, blas.Upper, blas.Trans, blas.Unit, m, k, + 1, v, ldv, + work, ldwork) + if n > k { + bi.Dgemm(blas.NoTrans, blas.Trans, m, k, n-k, + 1, c[k:], ldc, v[k:], ldv, + 1, work, ldwork) + } + // W *= T or Tᵀ. + bi.Dtrmm(blas.Right, blas.Upper, trans, blas.NonUnit, m, k, + 1, t, ldt, + work, ldwork) + // C -= W * V. + if n > k { + bi.Dgemm(blas.NoTrans, blas.NoTrans, m, n-k, k, + -1, work, ldwork, v[k:], ldv, + 1, c[k:], ldc) + } + // W *= V1. + bi.Dtrmm(blas.Right, blas.Upper, blas.NoTrans, blas.Unit, m, k, + 1, v, ldv, + work, ldwork) + // C1 -= W. + // TODO(btracey): This should use blas.Axpy. + for i := 0; i < m; i++ { + for j := 0; j < k; j++ { + c[i*ldc+j] -= work[i*ldwork+j] + } + } + return + } + // V = (V1 V2) where V2 is the last k columns and is lower unit triangular. + if side == blas.Left { + // Form H * C or Hᵀ C where C = (C1 ; C2). + // W = Cᵀ * Vᵀ. + + // W = C2ᵀ. + for j := 0; j < k; j++ { + bi.Dcopy(n, c[(m-k+j)*ldc:], 1, work[j:], ldwork) + } + // W *= V2ᵀ. + bi.Dtrmm(blas.Right, blas.Lower, blas.Trans, blas.Unit, n, k, + 1, v[m-k:], ldv, + work, ldwork) + if m > k { + bi.Dgemm(blas.Trans, blas.Trans, n, k, m-k, + 1, c, ldc, v, ldv, + 1, work, ldwork) + } + // W *= T or Tᵀ. + bi.Dtrmm(blas.Right, blas.Lower, transt, blas.NonUnit, n, k, + 1, t, ldt, + work, ldwork) + // C -= Vᵀ * Wᵀ. + if m > k { + bi.Dgemm(blas.Trans, blas.Trans, m-k, n, k, + -1, v, ldv, work, ldwork, + 1, c, ldc) + } + // W *= V2. + bi.Dtrmm(blas.Right, blas.Lower, blas.NoTrans, blas.Unit, n, k, + 1, v[m-k:], ldv, + work, ldwork) + // C2 -= Wᵀ. + // TODO(btracey): This should use blas.Axpy. + for i := 0; i < n; i++ { + for j := 0; j < k; j++ { + c[(m-k+j)*ldc+i] -= work[i*ldwork+j] + } + } + return + } + // Form C * H or C * Hᵀ where C = (C1 C2). + // W = C * Vᵀ. + // W = C2. + for j := 0; j < k; j++ { + bi.Dcopy(m, c[n-k+j:], ldc, work[j:], ldwork) + } + // W *= V2ᵀ. + bi.Dtrmm(blas.Right, blas.Lower, blas.Trans, blas.Unit, m, k, + 1, v[n-k:], ldv, + work, ldwork) + if n > k { + bi.Dgemm(blas.NoTrans, blas.Trans, m, k, n-k, + 1, c, ldc, v, ldv, + 1, work, ldwork) + } + // W *= T or Tᵀ. + bi.Dtrmm(blas.Right, blas.Lower, trans, blas.NonUnit, m, k, + 1, t, ldt, + work, ldwork) + // C -= W * V. + if n > k { + bi.Dgemm(blas.NoTrans, blas.NoTrans, m, n-k, k, + -1, work, ldwork, v, ldv, + 1, c, ldc) + } + // W *= V2. + bi.Dtrmm(blas.Right, blas.Lower, blas.NoTrans, blas.Unit, m, k, + 1, v[n-k:], ldv, + work, ldwork) + // C1 -= W. + // TODO(btracey): This should use blas.Axpy. + for i := 0; i < m; i++ { + for j := 0; j < k; j++ { + c[i*ldc+n-k+j] -= work[i*ldwork+j] + } + } +} diff --git a/vendor/gonum.org/v1/gonum/lapack/gonum/dlarfg.go b/vendor/gonum.org/v1/gonum/lapack/gonum/dlarfg.go new file mode 100644 index 0000000000..74ad111d41 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dlarfg.go @@ -0,0 +1,75 @@ +// Copyright ©2015 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gonum + +import ( + "math" + + "gonum.org/v1/gonum/blas/blas64" +) + +// Dlarfg generates an elementary reflector for a Householder matrix. It creates +// a real elementary reflector of order n such that +// +// H * (alpha) = (beta) +// ( x) ( 0) +// Hᵀ * H = I +// +// H is represented in the form +// +// H = 1 - tau * (1; v) * (1 vᵀ) +// +// where tau is a real scalar. +// +// On entry, x contains the vector x, on exit it contains v. +// +// Dlarfg is an internal routine. It is exported for testing purposes. +func (impl Implementation) Dlarfg(n int, alpha float64, x []float64, incX int) (beta, tau float64) { + switch { + case n < 0: + panic(nLT0) + case incX <= 0: + panic(badIncX) + } + + if n <= 1 { + return alpha, 0 + } + + if len(x) < 1+(n-2)*abs(incX) { + panic(shortX) + } + + bi := blas64.Implementation() + + xnorm := bi.Dnrm2(n-1, x, incX) + if xnorm == 0 { + return alpha, 0 + } + beta = -math.Copysign(impl.Dlapy2(alpha, xnorm), alpha) + safmin := dlamchS / dlamchE + knt := 0 + if math.Abs(beta) < safmin { + // xnorm and beta may be inaccurate, scale x and recompute. + rsafmn := 1 / safmin + for { + knt++ + bi.Dscal(n-1, rsafmn, x, incX) + beta *= rsafmn + alpha *= rsafmn + if math.Abs(beta) >= safmin { + break + } + } + xnorm = bi.Dnrm2(n-1, x, incX) + beta = -math.Copysign(impl.Dlapy2(alpha, xnorm), alpha) + } + tau = (beta - alpha) / beta + bi.Dscal(n-1, 1/(alpha-beta), x, incX) + for j := 0; j < knt; j++ { + beta *= safmin + } + return beta, tau +} diff --git a/vendor/gonum.org/v1/gonum/lapack/gonum/dlarft.go b/vendor/gonum.org/v1/gonum/lapack/gonum/dlarft.go new file mode 100644 index 0000000000..921a5a3d21 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dlarft.go @@ -0,0 +1,169 @@ +// Copyright ©2015 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gonum + +import ( + "gonum.org/v1/gonum/blas" + "gonum.org/v1/gonum/blas/blas64" + "gonum.org/v1/gonum/lapack" +) + +// Dlarft forms the triangular factor T of a block reflector H, storing the answer +// in t. +// +// H = I - V * T * Vᵀ if store == lapack.ColumnWise +// H = I - Vᵀ * T * V if store == lapack.RowWise +// +// H is defined by a product of the elementary reflectors where +// +// H = H_0 * H_1 * ... * H_{k-1} if direct == lapack.Forward +// H = H_{k-1} * ... * H_1 * H_0 if direct == lapack.Backward +// +// t is a k×k triangular matrix. t is upper triangular if direct = lapack.Forward +// and lower triangular otherwise. This function will panic if t is not of +// sufficient size. +// +// store describes the storage of the elementary reflectors in v. See +// Dlarfb for a description of layout. +// +// tau contains the scalar factors of the elementary reflectors H_i. +// +// Dlarft is an internal routine. It is exported for testing purposes. +func (Implementation) Dlarft(direct lapack.Direct, store lapack.StoreV, n, k int, v []float64, ldv int, tau []float64, t []float64, ldt int) { + mv, nv := n, k + if store == lapack.RowWise { + mv, nv = k, n + } + switch { + case direct != lapack.Forward && direct != lapack.Backward: + panic(badDirect) + case store != lapack.RowWise && store != lapack.ColumnWise: + panic(badStoreV) + case n < 0: + panic(nLT0) + case k < 1: + panic(kLT1) + case ldv < max(1, nv): + panic(badLdV) + case len(tau) < k: + panic(shortTau) + case ldt < max(1, k): + panic(shortT) + } + + if n == 0 { + return + } + + switch { + case len(v) < (mv-1)*ldv+nv: + panic(shortV) + case len(t) < (k-1)*ldt+k: + panic(shortT) + } + + bi := blas64.Implementation() + + // TODO(btracey): There are a number of minor obvious loop optimizations here. + // TODO(btracey): It may be possible to rearrange some of the code so that + // index of 1 is more common in the Dgemv. + if direct == lapack.Forward { + prevlastv := n - 1 + for i := 0; i < k; i++ { + prevlastv = max(i, prevlastv) + if tau[i] == 0 { + for j := 0; j <= i; j++ { + t[j*ldt+i] = 0 + } + continue + } + var lastv int + if store == lapack.ColumnWise { + // skip trailing zeros + for lastv = n - 1; lastv >= i+1; lastv-- { + if v[lastv*ldv+i] != 0 { + break + } + } + for j := 0; j < i; j++ { + t[j*ldt+i] = -tau[i] * v[i*ldv+j] + } + j := min(lastv, prevlastv) + bi.Dgemv(blas.Trans, j-i, i, + -tau[i], v[(i+1)*ldv:], ldv, v[(i+1)*ldv+i:], ldv, + 1, t[i:], ldt) + } else { + for lastv = n - 1; lastv >= i+1; lastv-- { + if v[i*ldv+lastv] != 0 { + break + } + } + for j := 0; j < i; j++ { + t[j*ldt+i] = -tau[i] * v[j*ldv+i] + } + j := min(lastv, prevlastv) + bi.Dgemv(blas.NoTrans, i, j-i, + -tau[i], v[i+1:], ldv, v[i*ldv+i+1:], 1, + 1, t[i:], ldt) + } + bi.Dtrmv(blas.Upper, blas.NoTrans, blas.NonUnit, i, t, ldt, t[i:], ldt) + t[i*ldt+i] = tau[i] + if i > 1 { + prevlastv = max(prevlastv, lastv) + } else { + prevlastv = lastv + } + } + return + } + prevlastv := 0 + for i := k - 1; i >= 0; i-- { + if tau[i] == 0 { + for j := i; j < k; j++ { + t[j*ldt+i] = 0 + } + continue + } + var lastv int + if i < k-1 { + if store == lapack.ColumnWise { + for lastv = 0; lastv < i; lastv++ { + if v[lastv*ldv+i] != 0 { + break + } + } + for j := i + 1; j < k; j++ { + t[j*ldt+i] = -tau[i] * v[(n-k+i)*ldv+j] + } + j := max(lastv, prevlastv) + bi.Dgemv(blas.Trans, n-k+i-j, k-i-1, + -tau[i], v[j*ldv+i+1:], ldv, v[j*ldv+i:], ldv, + 1, t[(i+1)*ldt+i:], ldt) + } else { + for lastv = 0; lastv < i; lastv++ { + if v[i*ldv+lastv] != 0 { + break + } + } + for j := i + 1; j < k; j++ { + t[j*ldt+i] = -tau[i] * v[j*ldv+n-k+i] + } + j := max(lastv, prevlastv) + bi.Dgemv(blas.NoTrans, k-i-1, n-k+i-j, + -tau[i], v[(i+1)*ldv+j:], ldv, v[i*ldv+j:], 1, + 1, t[(i+1)*ldt+i:], ldt) + } + bi.Dtrmv(blas.Lower, blas.NoTrans, blas.NonUnit, k-i-1, + t[(i+1)*ldt+i+1:], ldt, + t[(i+1)*ldt+i:], ldt) + if i > 0 { + prevlastv = min(prevlastv, lastv) + } else { + prevlastv = lastv + } + } + t[i*ldt+i] = tau[i] + } +} diff --git a/vendor/gonum.org/v1/gonum/lapack/gonum/dlarfx.go b/vendor/gonum.org/v1/gonum/lapack/gonum/dlarfx.go new file mode 100644 index 0000000000..4e40dad188 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dlarfx.go @@ -0,0 +1,552 @@ +// Copyright ©2016 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gonum + +import "gonum.org/v1/gonum/blas" + +// Dlarfx applies an elementary reflector H to a real m×n matrix C, from either +// the left or the right, with loop unrolling when the reflector has order less +// than 11. +// +// H is represented in the form +// +// H = I - tau * v * vᵀ, +// +// where tau is a real scalar and v is a real vector. If tau = 0, then H is +// taken to be the identity matrix. +// +// v must have length equal to m if side == blas.Left, and equal to n if side == +// blas.Right, otherwise Dlarfx will panic. +// +// c and ldc represent the m×n matrix C. On return, C is overwritten by the +// matrix H * C if side == blas.Left, or C * H if side == blas.Right. +// +// work must have length at least n if side == blas.Left, and at least m if side +// == blas.Right, otherwise Dlarfx will panic. work is not referenced if H has +// order < 11. +// +// Dlarfx is an internal routine. It is exported for testing purposes. +func (impl Implementation) Dlarfx(side blas.Side, m, n int, v []float64, tau float64, c []float64, ldc int, work []float64) { + switch { + case side != blas.Left && side != blas.Right: + panic(badSide) + case m < 0: + panic(mLT0) + case n < 0: + panic(nLT0) + case ldc < max(1, n): + panic(badLdC) + } + + // Quick return if possible. + if m == 0 || n == 0 { + return + } + + nh := m + lwork := n + if side == blas.Right { + nh = n + lwork = m + } + switch { + case len(v) < nh: + panic(shortV) + case len(c) < (m-1)*ldc+n: + panic(shortC) + case nh > 10 && len(work) < lwork: + panic(shortWork) + } + + if tau == 0 { + return + } + + if side == blas.Left { + // Form H * C, where H has order m. + switch m { + default: // Code for general m. + impl.Dlarf(side, m, n, v, 1, tau, c, ldc, work) + return + + case 0: // No-op for zero size matrix. + return + + case 1: // Special code for 1×1 Householder matrix. + t0 := 1 - tau*v[0]*v[0] + for j := 0; j < n; j++ { + c[j] *= t0 + } + return + + case 2: // Special code for 2×2 Householder matrix. + v0 := v[0] + t0 := tau * v0 + v1 := v[1] + t1 := tau * v1 + for j := 0; j < n; j++ { + sum := v0*c[j] + v1*c[ldc+j] + c[j] -= sum * t0 + c[ldc+j] -= sum * t1 + } + return + + case 3: // Special code for 3×3 Householder matrix. + v0 := v[0] + t0 := tau * v0 + v1 := v[1] + t1 := tau * v1 + v2 := v[2] + t2 := tau * v2 + for j := 0; j < n; j++ { + sum := v0*c[j] + v1*c[ldc+j] + v2*c[2*ldc+j] + c[j] -= sum * t0 + c[ldc+j] -= sum * t1 + c[2*ldc+j] -= sum * t2 + } + return + + case 4: // Special code for 4×4 Householder matrix. + v0 := v[0] + t0 := tau * v0 + v1 := v[1] + t1 := tau * v1 + v2 := v[2] + t2 := tau * v2 + v3 := v[3] + t3 := tau * v3 + for j := 0; j < n; j++ { + sum := v0*c[j] + v1*c[ldc+j] + v2*c[2*ldc+j] + v3*c[3*ldc+j] + c[j] -= sum * t0 + c[ldc+j] -= sum * t1 + c[2*ldc+j] -= sum * t2 + c[3*ldc+j] -= sum * t3 + } + return + + case 5: // Special code for 5×5 Householder matrix. + v0 := v[0] + t0 := tau * v0 + v1 := v[1] + t1 := tau * v1 + v2 := v[2] + t2 := tau * v2 + v3 := v[3] + t3 := tau * v3 + v4 := v[4] + t4 := tau * v4 + for j := 0; j < n; j++ { + sum := v0*c[j] + v1*c[ldc+j] + v2*c[2*ldc+j] + v3*c[3*ldc+j] + v4*c[4*ldc+j] + c[j] -= sum * t0 + c[ldc+j] -= sum * t1 + c[2*ldc+j] -= sum * t2 + c[3*ldc+j] -= sum * t3 + c[4*ldc+j] -= sum * t4 + } + return + + case 6: // Special code for 6×6 Householder matrix. + v0 := v[0] + t0 := tau * v0 + v1 := v[1] + t1 := tau * v1 + v2 := v[2] + t2 := tau * v2 + v3 := v[3] + t3 := tau * v3 + v4 := v[4] + t4 := tau * v4 + v5 := v[5] + t5 := tau * v5 + for j := 0; j < n; j++ { + sum := v0*c[j] + v1*c[ldc+j] + v2*c[2*ldc+j] + v3*c[3*ldc+j] + v4*c[4*ldc+j] + + v5*c[5*ldc+j] + c[j] -= sum * t0 + c[ldc+j] -= sum * t1 + c[2*ldc+j] -= sum * t2 + c[3*ldc+j] -= sum * t3 + c[4*ldc+j] -= sum * t4 + c[5*ldc+j] -= sum * t5 + } + return + + case 7: // Special code for 7×7 Householder matrix. + v0 := v[0] + t0 := tau * v0 + v1 := v[1] + t1 := tau * v1 + v2 := v[2] + t2 := tau * v2 + v3 := v[3] + t3 := tau * v3 + v4 := v[4] + t4 := tau * v4 + v5 := v[5] + t5 := tau * v5 + v6 := v[6] + t6 := tau * v6 + for j := 0; j < n; j++ { + sum := v0*c[j] + v1*c[ldc+j] + v2*c[2*ldc+j] + v3*c[3*ldc+j] + v4*c[4*ldc+j] + + v5*c[5*ldc+j] + v6*c[6*ldc+j] + c[j] -= sum * t0 + c[ldc+j] -= sum * t1 + c[2*ldc+j] -= sum * t2 + c[3*ldc+j] -= sum * t3 + c[4*ldc+j] -= sum * t4 + c[5*ldc+j] -= sum * t5 + c[6*ldc+j] -= sum * t6 + } + return + + case 8: // Special code for 8×8 Householder matrix. + v0 := v[0] + t0 := tau * v0 + v1 := v[1] + t1 := tau * v1 + v2 := v[2] + t2 := tau * v2 + v3 := v[3] + t3 := tau * v3 + v4 := v[4] + t4 := tau * v4 + v5 := v[5] + t5 := tau * v5 + v6 := v[6] + t6 := tau * v6 + v7 := v[7] + t7 := tau * v7 + for j := 0; j < n; j++ { + sum := v0*c[j] + v1*c[ldc+j] + v2*c[2*ldc+j] + v3*c[3*ldc+j] + v4*c[4*ldc+j] + + v5*c[5*ldc+j] + v6*c[6*ldc+j] + v7*c[7*ldc+j] + c[j] -= sum * t0 + c[ldc+j] -= sum * t1 + c[2*ldc+j] -= sum * t2 + c[3*ldc+j] -= sum * t3 + c[4*ldc+j] -= sum * t4 + c[5*ldc+j] -= sum * t5 + c[6*ldc+j] -= sum * t6 + c[7*ldc+j] -= sum * t7 + } + return + + case 9: // Special code for 9×9 Householder matrix. + v0 := v[0] + t0 := tau * v0 + v1 := v[1] + t1 := tau * v1 + v2 := v[2] + t2 := tau * v2 + v3 := v[3] + t3 := tau * v3 + v4 := v[4] + t4 := tau * v4 + v5 := v[5] + t5 := tau * v5 + v6 := v[6] + t6 := tau * v6 + v7 := v[7] + t7 := tau * v7 + v8 := v[8] + t8 := tau * v8 + for j := 0; j < n; j++ { + sum := v0*c[j] + v1*c[ldc+j] + v2*c[2*ldc+j] + v3*c[3*ldc+j] + v4*c[4*ldc+j] + + v5*c[5*ldc+j] + v6*c[6*ldc+j] + v7*c[7*ldc+j] + v8*c[8*ldc+j] + c[j] -= sum * t0 + c[ldc+j] -= sum * t1 + c[2*ldc+j] -= sum * t2 + c[3*ldc+j] -= sum * t3 + c[4*ldc+j] -= sum * t4 + c[5*ldc+j] -= sum * t5 + c[6*ldc+j] -= sum * t6 + c[7*ldc+j] -= sum * t7 + c[8*ldc+j] -= sum * t8 + } + return + + case 10: // Special code for 10×10 Householder matrix. + v0 := v[0] + t0 := tau * v0 + v1 := v[1] + t1 := tau * v1 + v2 := v[2] + t2 := tau * v2 + v3 := v[3] + t3 := tau * v3 + v4 := v[4] + t4 := tau * v4 + v5 := v[5] + t5 := tau * v5 + v6 := v[6] + t6 := tau * v6 + v7 := v[7] + t7 := tau * v7 + v8 := v[8] + t8 := tau * v8 + v9 := v[9] + t9 := tau * v9 + for j := 0; j < n; j++ { + sum := v0*c[j] + v1*c[ldc+j] + v2*c[2*ldc+j] + v3*c[3*ldc+j] + v4*c[4*ldc+j] + + v5*c[5*ldc+j] + v6*c[6*ldc+j] + v7*c[7*ldc+j] + v8*c[8*ldc+j] + v9*c[9*ldc+j] + c[j] -= sum * t0 + c[ldc+j] -= sum * t1 + c[2*ldc+j] -= sum * t2 + c[3*ldc+j] -= sum * t3 + c[4*ldc+j] -= sum * t4 + c[5*ldc+j] -= sum * t5 + c[6*ldc+j] -= sum * t6 + c[7*ldc+j] -= sum * t7 + c[8*ldc+j] -= sum * t8 + c[9*ldc+j] -= sum * t9 + } + return + } + } + + // Form C * H, where H has order n. + switch n { + default: // Code for general n. + impl.Dlarf(side, m, n, v, 1, tau, c, ldc, work) + return + + case 0: // No-op for zero size matrix. + return + + case 1: // Special code for 1×1 Householder matrix. + t0 := 1 - tau*v[0]*v[0] + for j := 0; j < m; j++ { + c[j*ldc] *= t0 + } + return + + case 2: // Special code for 2×2 Householder matrix. + v0 := v[0] + t0 := tau * v0 + v1 := v[1] + t1 := tau * v1 + for j := 0; j < m; j++ { + cs := c[j*ldc:] + sum := v0*cs[0] + v1*cs[1] + cs[0] -= sum * t0 + cs[1] -= sum * t1 + } + return + + case 3: // Special code for 3×3 Householder matrix. + v0 := v[0] + t0 := tau * v0 + v1 := v[1] + t1 := tau * v1 + v2 := v[2] + t2 := tau * v2 + for j := 0; j < m; j++ { + cs := c[j*ldc:] + sum := v0*cs[0] + v1*cs[1] + v2*cs[2] + cs[0] -= sum * t0 + cs[1] -= sum * t1 + cs[2] -= sum * t2 + } + return + + case 4: // Special code for 4×4 Householder matrix. + v0 := v[0] + t0 := tau * v0 + v1 := v[1] + t1 := tau * v1 + v2 := v[2] + t2 := tau * v2 + v3 := v[3] + t3 := tau * v3 + for j := 0; j < m; j++ { + cs := c[j*ldc:] + sum := v0*cs[0] + v1*cs[1] + v2*cs[2] + v3*cs[3] + cs[0] -= sum * t0 + cs[1] -= sum * t1 + cs[2] -= sum * t2 + cs[3] -= sum * t3 + } + return + + case 5: // Special code for 5×5 Householder matrix. + v0 := v[0] + t0 := tau * v0 + v1 := v[1] + t1 := tau * v1 + v2 := v[2] + t2 := tau * v2 + v3 := v[3] + t3 := tau * v3 + v4 := v[4] + t4 := tau * v4 + for j := 0; j < m; j++ { + cs := c[j*ldc:] + sum := v0*cs[0] + v1*cs[1] + v2*cs[2] + v3*cs[3] + v4*cs[4] + cs[0] -= sum * t0 + cs[1] -= sum * t1 + cs[2] -= sum * t2 + cs[3] -= sum * t3 + cs[4] -= sum * t4 + } + return + + case 6: // Special code for 6×6 Householder matrix. + v0 := v[0] + t0 := tau * v0 + v1 := v[1] + t1 := tau * v1 + v2 := v[2] + t2 := tau * v2 + v3 := v[3] + t3 := tau * v3 + v4 := v[4] + t4 := tau * v4 + v5 := v[5] + t5 := tau * v5 + for j := 0; j < m; j++ { + cs := c[j*ldc:] + sum := v0*cs[0] + v1*cs[1] + v2*cs[2] + v3*cs[3] + v4*cs[4] + v5*cs[5] + cs[0] -= sum * t0 + cs[1] -= sum * t1 + cs[2] -= sum * t2 + cs[3] -= sum * t3 + cs[4] -= sum * t4 + cs[5] -= sum * t5 + } + return + + case 7: // Special code for 7×7 Householder matrix. + v0 := v[0] + t0 := tau * v0 + v1 := v[1] + t1 := tau * v1 + v2 := v[2] + t2 := tau * v2 + v3 := v[3] + t3 := tau * v3 + v4 := v[4] + t4 := tau * v4 + v5 := v[5] + t5 := tau * v5 + v6 := v[6] + t6 := tau * v6 + for j := 0; j < m; j++ { + cs := c[j*ldc:] + sum := v0*cs[0] + v1*cs[1] + v2*cs[2] + v3*cs[3] + v4*cs[4] + + v5*cs[5] + v6*cs[6] + cs[0] -= sum * t0 + cs[1] -= sum * t1 + cs[2] -= sum * t2 + cs[3] -= sum * t3 + cs[4] -= sum * t4 + cs[5] -= sum * t5 + cs[6] -= sum * t6 + } + return + + case 8: // Special code for 8×8 Householder matrix. + v0 := v[0] + t0 := tau * v0 + v1 := v[1] + t1 := tau * v1 + v2 := v[2] + t2 := tau * v2 + v3 := v[3] + t3 := tau * v3 + v4 := v[4] + t4 := tau * v4 + v5 := v[5] + t5 := tau * v5 + v6 := v[6] + t6 := tau * v6 + v7 := v[7] + t7 := tau * v7 + for j := 0; j < m; j++ { + cs := c[j*ldc:] + sum := v0*cs[0] + v1*cs[1] + v2*cs[2] + v3*cs[3] + v4*cs[4] + + v5*cs[5] + v6*cs[6] + v7*cs[7] + cs[0] -= sum * t0 + cs[1] -= sum * t1 + cs[2] -= sum * t2 + cs[3] -= sum * t3 + cs[4] -= sum * t4 + cs[5] -= sum * t5 + cs[6] -= sum * t6 + cs[7] -= sum * t7 + } + return + + case 9: // Special code for 9×9 Householder matrix. + v0 := v[0] + t0 := tau * v0 + v1 := v[1] + t1 := tau * v1 + v2 := v[2] + t2 := tau * v2 + v3 := v[3] + t3 := tau * v3 + v4 := v[4] + t4 := tau * v4 + v5 := v[5] + t5 := tau * v5 + v6 := v[6] + t6 := tau * v6 + v7 := v[7] + t7 := tau * v7 + v8 := v[8] + t8 := tau * v8 + for j := 0; j < m; j++ { + cs := c[j*ldc:] + sum := v0*cs[0] + v1*cs[1] + v2*cs[2] + v3*cs[3] + v4*cs[4] + + v5*cs[5] + v6*cs[6] + v7*cs[7] + v8*cs[8] + cs[0] -= sum * t0 + cs[1] -= sum * t1 + cs[2] -= sum * t2 + cs[3] -= sum * t3 + cs[4] -= sum * t4 + cs[5] -= sum * t5 + cs[6] -= sum * t6 + cs[7] -= sum * t7 + cs[8] -= sum * t8 + } + return + + case 10: // Special code for 10×10 Householder matrix. + v0 := v[0] + t0 := tau * v0 + v1 := v[1] + t1 := tau * v1 + v2 := v[2] + t2 := tau * v2 + v3 := v[3] + t3 := tau * v3 + v4 := v[4] + t4 := tau * v4 + v5 := v[5] + t5 := tau * v5 + v6 := v[6] + t6 := tau * v6 + v7 := v[7] + t7 := tau * v7 + v8 := v[8] + t8 := tau * v8 + v9 := v[9] + t9 := tau * v9 + for j := 0; j < m; j++ { + cs := c[j*ldc:] + sum := v0*cs[0] + v1*cs[1] + v2*cs[2] + v3*cs[3] + v4*cs[4] + + v5*cs[5] + v6*cs[6] + v7*cs[7] + v8*cs[8] + v9*cs[9] + cs[0] -= sum * t0 + cs[1] -= sum * t1 + cs[2] -= sum * t2 + cs[3] -= sum * t3 + cs[4] -= sum * t4 + cs[5] -= sum * t5 + cs[6] -= sum * t6 + cs[7] -= sum * t7 + cs[8] -= sum * t8 + cs[9] -= sum * t9 + } + return + } +} diff --git a/vendor/gonum.org/v1/gonum/lapack/gonum/dlartg.go b/vendor/gonum.org/v1/gonum/lapack/gonum/dlartg.go new file mode 100644 index 0000000000..93416c6f5f --- /dev/null +++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dlartg.go @@ -0,0 +1,73 @@ +// Copyright ©2015 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gonum + +import "math" + +// Dlartg generates a plane rotation so that +// +// [ cs sn] * [f] = [r] +// [-sn cs] [g] = [0] +// +// where cs*cs + sn*sn = 1. +// +// This is a more accurate version of BLAS Drotg that uses scaling to avoid +// overflow or underflow, with the other differences that +// - cs >= 0 +// - if g = 0, then cs = 1 and sn = 0 +// - if f = 0 and g != 0, then cs = 0 and sn = sign(1,g) +// +// Dlartg is an internal routine. It is exported for testing purposes. +func (impl Implementation) Dlartg(f, g float64) (cs, sn, r float64) { + // Implementation based on Supplemental Material to: + // + // Edward Anderson + // Algorithm 978: Safe Scaling in the Level 1 BLAS + // ACM Trans. Math. Softw. 44, 1, Article 12 (2017) + // DOI: https://doi.org/10.1145/3061665 + // + // For further details see: + // + // W. Pereira, A. Lotfi, J. Langou + // Numerical analysis of Givens rotation + // DOI: https://doi.org/10.48550/arXiv.2211.04010 + + if g == 0 { + return 1, 0, f + } + + g1 := math.Abs(g) + + if f == 0 { + return 0, math.Copysign(1, g), g1 + } + + const safmin = dlamchS + const safmax = 1 / safmin + rtmin := math.Sqrt(safmin) + rtmax := math.Sqrt(safmax / 2) + + f1 := math.Abs(f) + + if rtmin < f1 && f1 < rtmax && rtmin < g1 && g1 < rtmax { + d := math.Sqrt(f*f + g*g) + cs = f1 / d + r = math.Copysign(d, f) + sn = g / r + + return cs, sn, r + } + + u := math.Min(math.Max(safmin, math.Max(f1, g1)), safmax) + fs := f / u + gs := g / u + d := math.Sqrt(fs*fs + gs*gs) + cs = math.Abs(fs) / d + r = math.Copysign(d, f) + sn = gs / r + r *= u + + return cs, sn, r +} diff --git a/vendor/gonum.org/v1/gonum/lapack/gonum/dlas2.go b/vendor/gonum.org/v1/gonum/lapack/gonum/dlas2.go new file mode 100644 index 0000000000..a819fa3536 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dlas2.go @@ -0,0 +1,45 @@ +// Copyright ©2015 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gonum + +import "math" + +// Dlas2 computes the singular values of the 2×2 matrix defined by +// +// [F G] +// [0 H] +// +// The smaller and larger singular values are returned in that order. +// +// Dlas2 is an internal routine. It is exported for testing purposes. +func (impl Implementation) Dlas2(f, g, h float64) (ssmin, ssmax float64) { + fa := math.Abs(f) + ga := math.Abs(g) + ha := math.Abs(h) + fhmin := math.Min(fa, ha) + fhmax := math.Max(fa, ha) + if fhmin == 0 { + if fhmax == 0 { + return 0, ga + } + v := math.Min(fhmax, ga) / math.Max(fhmax, ga) + return 0, math.Max(fhmax, ga) * math.Sqrt(1+v*v) + } + if ga < fhmax { + as := 1 + fhmin/fhmax + at := (fhmax - fhmin) / fhmax + au := (ga / fhmax) * (ga / fhmax) + c := 2 / (math.Sqrt(as*as+au) + math.Sqrt(at*at+au)) + return fhmin * c, fhmax / c + } + au := fhmax / ga + if au == 0 { + return fhmin * fhmax / ga, ga + } + as := 1 + fhmin/fhmax + at := (fhmax - fhmin) / fhmax + c := 1 / (math.Sqrt(1+(as*au)*(as*au)) + math.Sqrt(1+(at*au)*(at*au))) + return 2 * (fhmin * c) * au, ga / (c + c) +} diff --git a/vendor/gonum.org/v1/gonum/lapack/gonum/dlascl.go b/vendor/gonum.org/v1/gonum/lapack/gonum/dlascl.go new file mode 100644 index 0000000000..61c4eb79cb --- /dev/null +++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dlascl.go @@ -0,0 +1,111 @@ +// Copyright ©2015 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gonum + +import ( + "math" + + "gonum.org/v1/gonum/lapack" +) + +// Dlascl multiplies an m×n matrix by the scalar cto/cfrom. +// +// cfrom must not be zero, and cto and cfrom must not be NaN, otherwise Dlascl +// will panic. +// +// Dlascl is an internal routine. It is exported for testing purposes. +func (impl Implementation) Dlascl(kind lapack.MatrixType, kl, ku int, cfrom, cto float64, m, n int, a []float64, lda int) { + switch kind { + default: + panic(badMatrixType) + case 'H', 'B', 'Q', 'Z': // See dlascl.f. + panic("not implemented") + case lapack.General, lapack.UpperTri, lapack.LowerTri: + if lda < max(1, n) { + panic(badLdA) + } + } + switch { + case cfrom == 0: + panic(zeroCFrom) + case math.IsNaN(cfrom): + panic(nanCFrom) + case math.IsNaN(cto): + panic(nanCTo) + case m < 0: + panic(mLT0) + case n < 0: + panic(nLT0) + } + + if n == 0 || m == 0 { + return + } + + switch kind { + case lapack.General, lapack.UpperTri, lapack.LowerTri: + if len(a) < (m-1)*lda+n { + panic(shortA) + } + } + + smlnum := dlamchS + bignum := 1 / smlnum + cfromc := cfrom + ctoc := cto + cfrom1 := cfromc * smlnum + for { + var done bool + var mul, ctol float64 + if cfrom1 == cfromc { + // cfromc is inf. + mul = ctoc / cfromc + done = true + ctol = ctoc + } else { + ctol = ctoc / bignum + if ctol == ctoc { + // ctoc is either 0 or inf. + mul = ctoc + done = true + cfromc = 1 + } else if math.Abs(cfrom1) > math.Abs(ctoc) && ctoc != 0 { + mul = smlnum + done = false + cfromc = cfrom1 + } else if math.Abs(ctol) > math.Abs(cfromc) { + mul = bignum + done = false + ctoc = ctol + } else { + mul = ctoc / cfromc + done = true + } + } + switch kind { + case lapack.General: + for i := 0; i < m; i++ { + for j := 0; j < n; j++ { + a[i*lda+j] = a[i*lda+j] * mul + } + } + case lapack.UpperTri: + for i := 0; i < m; i++ { + for j := i; j < n; j++ { + a[i*lda+j] = a[i*lda+j] * mul + } + } + case lapack.LowerTri: + for i := 0; i < m; i++ { + for j := 0; j <= min(i, n-1); j++ { + a[i*lda+j] = a[i*lda+j] * mul + } + } + } + if done { + break + } + } +} diff --git a/vendor/gonum.org/v1/gonum/lapack/gonum/dlaset.go b/vendor/gonum.org/v1/gonum/lapack/gonum/dlaset.go new file mode 100644 index 0000000000..b8b6b0f4db --- /dev/null +++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dlaset.go @@ -0,0 +1,58 @@ +// Copyright ©2015 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gonum + +import "gonum.org/v1/gonum/blas" + +// Dlaset sets the off-diagonal elements of A to alpha, and the diagonal +// elements to beta. If uplo == blas.Upper, only the elements in the upper +// triangular part are set. If uplo == blas.Lower, only the elements in the +// lower triangular part are set. If uplo is otherwise, all of the elements of A +// are set. +// +// Dlaset is an internal routine. It is exported for testing purposes. +func (impl Implementation) Dlaset(uplo blas.Uplo, m, n int, alpha, beta float64, a []float64, lda int) { + switch { + case m < 0: + panic(mLT0) + case n < 0: + panic(nLT0) + case lda < max(1, n): + panic(badLdA) + } + + minmn := min(m, n) + if minmn == 0 { + return + } + + if len(a) < (m-1)*lda+n { + panic(shortA) + } + + switch uplo { + case blas.Upper: + for i := 0; i < m; i++ { + for j := i + 1; j < n; j++ { + a[i*lda+j] = alpha + } + } + case blas.Lower: + for i := 0; i < m; i++ { + for j := 0; j < min(i, n); j++ { + a[i*lda+j] = alpha + } + } + default: + for i := 0; i < m; i++ { + for j := 0; j < n; j++ { + a[i*lda+j] = alpha + } + } + } + for i := 0; i < minmn; i++ { + a[i*lda+i] = beta + } +} diff --git a/vendor/gonum.org/v1/gonum/lapack/gonum/dlasq1.go b/vendor/gonum.org/v1/gonum/lapack/gonum/dlasq1.go new file mode 100644 index 0000000000..1f1d1dc42e --- /dev/null +++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dlasq1.go @@ -0,0 +1,100 @@ +// Copyright ©2015 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gonum + +import ( + "math" + + "gonum.org/v1/gonum/blas/blas64" + "gonum.org/v1/gonum/lapack" +) + +// Dlasq1 computes the singular values of an n×n bidiagonal matrix with diagonal +// d and off-diagonal e. On exit, d contains the singular values in decreasing +// order, and e is overwritten. d must have length at least n, e must have +// length at least n-1, and the input work must have length at least 4*n. Dlasq1 +// will panic if these conditions are not met. +// +// Dlasq1 is an internal routine. It is exported for testing purposes. +func (impl Implementation) Dlasq1(n int, d, e, work []float64) (info int) { + if n < 0 { + panic(nLT0) + } + + if n == 0 { + return info + } + + switch { + case len(d) < n: + panic(shortD) + case len(e) < n-1: + panic(shortE) + case len(work) < 4*n: + panic(shortWork) + } + + if n == 1 { + d[0] = math.Abs(d[0]) + return info + } + + if n == 2 { + d[1], d[0] = impl.Dlas2(d[0], e[0], d[1]) + return info + } + + // Estimate the largest singular value. + var sigmx float64 + for i := 0; i < n-1; i++ { + d[i] = math.Abs(d[i]) + sigmx = math.Max(sigmx, math.Abs(e[i])) + } + d[n-1] = math.Abs(d[n-1]) + // Early return if sigmx is zero (matrix is already diagonal). + if sigmx == 0 { + impl.Dlasrt(lapack.SortDecreasing, n, d) + return info + } + + for i := 0; i < n; i++ { + sigmx = math.Max(sigmx, d[i]) + } + + // Copy D and E into WORK (in the Z format) and scale (squaring the + // input data makes scaling by a power of the radix pointless). + + eps := dlamchP + safmin := dlamchS + scale := math.Sqrt(eps / safmin) + bi := blas64.Implementation() + bi.Dcopy(n, d, 1, work, 2) + bi.Dcopy(n-1, e, 1, work[1:], 2) + impl.Dlascl(lapack.General, 0, 0, sigmx, scale, 2*n-1, 1, work, 1) + + // Compute the q's and e's. + for i := 0; i < 2*n-1; i++ { + work[i] *= work[i] + } + work[2*n-1] = 0 + + info = impl.Dlasq2(n, work) + if info == 0 { + for i := 0; i < n; i++ { + d[i] = math.Sqrt(work[i]) + } + impl.Dlascl(lapack.General, 0, 0, scale, sigmx, n, 1, d, 1) + } else if info == 2 { + // Maximum number of iterations exceeded. Move data from work + // into D and E so the calling subroutine can try to finish. + for i := 0; i < n; i++ { + d[i] = math.Sqrt(work[2*i]) + e[i] = math.Sqrt(work[2*i+1]) + } + impl.Dlascl(lapack.General, 0, 0, scale, sigmx, n, 1, d, 1) + impl.Dlascl(lapack.General, 0, 0, scale, sigmx, n, 1, e, 1) + } + return info +} diff --git a/vendor/gonum.org/v1/gonum/lapack/gonum/dlasq2.go b/vendor/gonum.org/v1/gonum/lapack/gonum/dlasq2.go new file mode 100644 index 0000000000..e3870b1d96 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dlasq2.go @@ -0,0 +1,370 @@ +// Copyright ©2015 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gonum + +import ( + "math" + + "gonum.org/v1/gonum/lapack" +) + +// Dlasq2 computes all the eigenvalues of the symmetric positive +// definite tridiagonal matrix associated with the qd array Z. Eigevalues +// are computed to high relative accuracy avoiding denormalization, underflow +// and overflow. +// +// To see the relation of Z to the tridiagonal matrix, let L be a +// unit lower bidiagonal matrix with sub-diagonals Z(2,4,6,,..) and +// let U be an upper bidiagonal matrix with 1's above and diagonal +// Z(1,3,5,,..). The tridiagonal is L*U or, if you prefer, the +// symmetric tridiagonal to which it is similar. +// +// info returns a status error. The return codes mean as follows: +// +// 0: The algorithm completed successfully. +// 1: A split was marked by a positive value in e. +// 2: Current block of Z not diagonalized after 100*n iterations (in inner +// while loop). On exit Z holds a qd array with the same eigenvalues as +// the given Z. +// 3: Termination criterion of outer while loop not met (program created more +// than N unreduced blocks). +// +// z must have length at least 4*n, and must not contain any negative elements. +// Dlasq2 will panic otherwise. +// +// Dlasq2 is an internal routine. It is exported for testing purposes. +func (impl Implementation) Dlasq2(n int, z []float64) (info int) { + if n < 0 { + panic(nLT0) + } + + if n == 0 { + return info + } + + if len(z) < 4*n { + panic(shortZ) + } + + if n == 1 { + if z[0] < 0 { + panic(negZ) + } + return info + } + + const cbias = 1.5 + + eps := dlamchP + safmin := dlamchS + tol := eps * 100 + tol2 := tol * tol + if n == 2 { + if z[1] < 0 || z[2] < 0 { + panic(negZ) + } else if z[2] > z[0] { + z[0], z[2] = z[2], z[0] + } + z[4] = z[0] + z[1] + z[2] + if z[1] > z[2]*tol2 { + t := 0.5 * (z[0] - z[2] + z[1]) + s := z[2] * (z[1] / t) + if s <= t { + s = z[2] * (z[1] / (t * (1 + math.Sqrt(1+s/t)))) + } else { + s = z[2] * (z[1] / (t + math.Sqrt(t)*math.Sqrt(t+s))) + } + t = z[0] + s + z[1] + z[2] *= z[0] / t + z[0] = t + } + z[1] = z[2] + z[5] = z[1] + z[0] + return info + } + // Check for negative data and compute sums of q's and e's. + z[2*n-1] = 0 + emin := z[1] + var d, e, qmax float64 + var i1, n1 int + for k := 0; k < 2*(n-1); k += 2 { + if z[k] < 0 || z[k+1] < 0 { + panic(negZ) + } + d += z[k] + e += z[k+1] + qmax = math.Max(qmax, z[k]) + emin = math.Min(emin, z[k+1]) + } + if z[2*(n-1)] < 0 { + panic(negZ) + } + d += z[2*(n-1)] + // Check for diagonality. + if e == 0 { + for k := 1; k < n; k++ { + z[k] = z[2*k] + } + impl.Dlasrt(lapack.SortDecreasing, n, z) + z[2*(n-1)] = d + return info + } + trace := d + e + // Check for zero data. + if trace == 0 { + z[2*(n-1)] = 0 + return info + } + // Rearrange data for locality: Z=(q1,qq1,e1,ee1,q2,qq2,e2,ee2,...). + for k := 2 * n; k >= 2; k -= 2 { + z[2*k-1] = 0 + z[2*k-2] = z[k-1] + z[2*k-3] = 0 + z[2*k-4] = z[k-2] + } + i0 := 0 + n0 := n - 1 + + // Reverse the qd-array, if warranted. + // z[4*i0-3] --> z[4*(i0+1)-3-1] --> z[4*i0] + if cbias*z[4*i0] < z[4*n0] { + ipn4Out := 4 * (i0 + n0 + 2) + for i4loop := 4 * (i0 + 1); i4loop <= 2*(i0+n0+1); i4loop += 4 { + i4 := i4loop - 1 + ipn4 := ipn4Out - 1 + z[i4-3], z[ipn4-i4-4] = z[ipn4-i4-4], z[i4-3] + z[i4-1], z[ipn4-i4-6] = z[ipn4-i4-6], z[i4-1] + } + } + + // Initial split checking via dqd and Li's test. + pp := 0 + for k := 0; k < 2; k++ { + d = z[4*n0+pp] + for i4loop := 4*n0 + pp; i4loop >= 4*(i0+1)+pp; i4loop -= 4 { + i4 := i4loop - 1 + if z[i4-1] <= tol2*d { + z[i4-1] = math.Copysign(0, -1) + d = z[i4-3] + } else { + d = z[i4-3] * (d / (d + z[i4-1])) + } + } + // dqd maps Z to ZZ plus Li's test. + emin = z[4*(i0+1)+pp] + d = z[4*i0+pp] + for i4loop := 4*(i0+1) + pp; i4loop <= 4*n0+pp; i4loop += 4 { + i4 := i4loop - 1 + z[i4-2*pp-2] = d + z[i4-1] + if z[i4-1] <= tol2*d { + z[i4-1] = math.Copysign(0, -1) + z[i4-2*pp-2] = d + z[i4-2*pp] = 0 + d = z[i4+1] + } else if safmin*z[i4+1] < z[i4-2*pp-2] && safmin*z[i4-2*pp-2] < z[i4+1] { + tmp := z[i4+1] / z[i4-2*pp-2] + z[i4-2*pp] = z[i4-1] * tmp + d *= tmp + } else { + z[i4-2*pp] = z[i4+1] * (z[i4-1] / z[i4-2*pp-2]) + d = z[i4+1] * (d / z[i4-2*pp-2]) + } + emin = math.Min(emin, z[i4-2*pp]) + } + z[4*(n0+1)-pp-3] = d + + // Now find qmax. + qmax = z[4*(i0+1)-pp-3] + for i4loop := 4*(i0+1) - pp + 2; i4loop <= 4*(n0+1)+pp-2; i4loop += 4 { + i4 := i4loop - 1 + qmax = math.Max(qmax, z[i4]) + } + // Prepare for the next iteration on K. + pp = 1 - pp + } + + // Initialise variables to pass to DLASQ3. + var ttype int + var dmin1, dmin2, dn, dn1, dn2, g, tau float64 + var tempq float64 + iter := 2 + var nFail int + nDiv := 2 * (n0 - i0) + var i4 int +outer: + for iwhila := 1; iwhila <= n+1; iwhila++ { + // Test for completion. + if n0 < 0 { + // Move q's to the front. + for k := 1; k < n; k++ { + z[k] = z[4*k] + } + // Sort and compute sum of eigenvalues. + impl.Dlasrt(lapack.SortDecreasing, n, z) + e = 0 + for k := n - 1; k >= 0; k-- { + e += z[k] + } + // Store trace, sum(eigenvalues) and information on performance. + z[2*n] = trace + z[2*n+1] = e + z[2*n+2] = float64(iter) + z[2*n+3] = float64(nDiv) / float64(n*n) + z[2*n+4] = 100 * float64(nFail) / float64(iter) + return info + } + + // While array unfinished do + // e[n0] holds the value of sigma when submatrix in i0:n0 + // splits from the rest of the array, but is negated. + var desig float64 + var sigma float64 + if n0 != n-1 { + sigma = -z[4*(n0+1)-2] + } + if sigma < 0 { + info = 1 + return info + } + // Find last unreduced submatrix's top index i0, find qmax and + // emin. Find Gershgorin-type bound if Q's much greater than E's. + var emax float64 + if n0 > i0 { + emin = math.Abs(z[4*(n0+1)-6]) + } else { + emin = 0 + } + qmin := z[4*(n0+1)-4] + qmax = qmin + zSmall := false + for i4loop := 4 * (n0 + 1); i4loop >= 8; i4loop -= 4 { + i4 = i4loop - 1 + if z[i4-5] <= 0 { + zSmall = true + break + } + if qmin >= 4*emax { + qmin = math.Min(qmin, z[i4-3]) + emax = math.Max(emax, z[i4-5]) + } + qmax = math.Max(qmax, z[i4-7]+z[i4-5]) + emin = math.Min(emin, z[i4-5]) + } + if !zSmall { + i4 = 3 + } + i0 = (i4+1)/4 - 1 + pp = 0 + if n0-i0 > 1 { + dee := z[4*i0] + deemin := dee + kmin := i0 + for i4loop := 4*(i0+1) + 1; i4loop <= 4*(n0+1)-3; i4loop += 4 { + i4 := i4loop - 1 + dee = z[i4] * (dee / (dee + z[i4-2])) + if dee <= deemin { + deemin = dee + kmin = (i4+4)/4 - 1 + } + } + if (kmin-i0)*2 < n0-kmin && deemin <= 0.5*z[4*n0] { + ipn4Out := 4 * (i0 + n0 + 2) + pp = 2 + for i4loop := 4 * (i0 + 1); i4loop <= 2*(i0+n0+1); i4loop += 4 { + i4 := i4loop - 1 + ipn4 := ipn4Out - 1 + z[i4-3], z[ipn4-i4-4] = z[ipn4-i4-4], z[i4-3] + z[i4-2], z[ipn4-i4-3] = z[ipn4-i4-3], z[i4-2] + z[i4-1], z[ipn4-i4-6] = z[ipn4-i4-6], z[i4-1] + z[i4], z[ipn4-i4-5] = z[ipn4-i4-5], z[i4] + } + } + } + // Put -(initial shift) into DMIN. + dmin := -math.Max(0, qmin-2*math.Sqrt(qmin)*math.Sqrt(emax)) + + // Now i0:n0 is unreduced. + // PP = 0 for ping, PP = 1 for pong. + // PP = 2 indicates that flipping was applied to the Z array and + // that the tests for deflation upon entry in Dlasq3 should + // not be performed. + nbig := 100 * (n0 - i0 + 1) + for iwhilb := 0; iwhilb < nbig; iwhilb++ { + if i0 > n0 { + continue outer + } + + // While submatrix unfinished take a good dqds step. + i0, n0, pp, dmin, sigma, desig, qmax, nFail, iter, nDiv, ttype, dmin1, dmin2, dn, dn1, dn2, g, tau = + impl.Dlasq3(i0, n0, z, pp, dmin, sigma, desig, qmax, nFail, iter, nDiv, ttype, dmin1, dmin2, dn, dn1, dn2, g, tau) + + pp = 1 - pp + // When emin is very small check for splits. + if pp == 0 && n0-i0 >= 3 { + if z[4*(n0+1)-1] <= tol2*qmax || z[4*(n0+1)-2] <= tol2*sigma { + splt := i0 - 1 + qmax = z[4*i0] + emin = z[4*(i0+1)-2] + oldemn := z[4*(i0+1)-1] + for i4loop := 4 * (i0 + 1); i4loop <= 4*(n0-2); i4loop += 4 { + i4 := i4loop - 1 + if z[i4] <= tol2*z[i4-3] || z[i4-1] <= tol2*sigma { + z[i4-1] = -sigma + splt = i4 / 4 + qmax = 0 + emin = z[i4+3] + oldemn = z[i4+4] + } else { + qmax = math.Max(qmax, z[i4+1]) + emin = math.Min(emin, z[i4-1]) + oldemn = math.Min(oldemn, z[i4]) + } + } + z[4*(n0+1)-2] = emin + z[4*(n0+1)-1] = oldemn + i0 = splt + 1 + } + } + } + // Maximum number of iterations exceeded, restore the shift + // sigma and place the new d's and e's in a qd array. + // This might need to be done for several blocks. + info = 2 + i1 = i0 + for { + tempq = z[4*i0] + z[4*i0] += sigma + for k := i0 + 1; k <= n0; k++ { + tempe := z[4*(k+1)-6] + z[4*(k+1)-6] *= tempq / z[4*(k+1)-8] + tempq = z[4*k] + z[4*k] += sigma + tempe - z[4*(k+1)-6] + } + // Prepare to do this on the previous block if there is one. + if i1 <= 0 { + break + } + n1 = i1 - 1 + for i1 >= 1 && z[4*(i1+1)-6] >= 0 { + i1 -= 1 + } + sigma = -z[4*(n1+1)-2] + } + for k := 0; k < n; k++ { + z[2*k] = z[4*k] + // Only the block 1..N0 is unfinished. The rest of the e's + // must be essentially zero, although sometimes other data + // has been stored in them. + if k < n0 { + z[2*(k+1)-1] = z[4*(k+1)-1] + } else { + z[2*(k+1)] = 0 + } + } + return info + } + info = 3 + return info +} diff --git a/vendor/gonum.org/v1/gonum/lapack/gonum/dlasq3.go b/vendor/gonum.org/v1/gonum/lapack/gonum/dlasq3.go new file mode 100644 index 0000000000..a05e94ef17 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dlasq3.go @@ -0,0 +1,172 @@ +// Copyright ©2015 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gonum + +import "math" + +// Dlasq3 checks for deflation, computes a shift (tau) and calls dqds. +// In case of failure it changes shifts, and tries again until output +// is positive. +// +// Dlasq3 is an internal routine. It is exported for testing purposes. +func (impl Implementation) Dlasq3(i0, n0 int, z []float64, pp int, dmin, sigma, desig, qmax float64, nFail, iter, nDiv int, ttype int, dmin1, dmin2, dn, dn1, dn2, g, tau float64) ( + i0Out, n0Out, ppOut int, dminOut, sigmaOut, desigOut, qmaxOut float64, nFailOut, iterOut, nDivOut, ttypeOut int, dmin1Out, dmin2Out, dnOut, dn1Out, dn2Out, gOut, tauOut float64) { + switch { + case i0 < 0: + panic(i0LT0) + case n0 < 0: + panic(n0LT0) + case len(z) < 4*n0: + panic(shortZ) + case pp != 0 && pp != 1 && pp != 2: + panic(badPp) + } + + const cbias = 1.5 + + n0in := n0 + eps := dlamchP + tol := eps * 100 + tol2 := tol * tol + var nn int + var t float64 + for { + if n0 < i0 { + return i0, n0, pp, dmin, sigma, desig, qmax, nFail, iter, nDiv, ttype, dmin1, dmin2, dn, dn1, dn2, g, tau + } + if n0 == i0 { + z[4*(n0+1)-4] = z[4*(n0+1)+pp-4] + sigma + n0-- + continue + } + nn = 4*(n0+1) + pp - 1 + if n0 != i0+1 { + // Check whether e[n0-1] is negligible, 1 eigenvalue. + if z[nn-5] > tol2*(sigma+z[nn-3]) && z[nn-2*pp-4] > tol2*z[nn-7] { + // Check whether e[n0-2] is negligible, 2 eigenvalues. + if z[nn-9] > tol2*sigma && z[nn-2*pp-8] > tol2*z[nn-11] { + break + } + } else { + z[4*(n0+1)-4] = z[4*(n0+1)+pp-4] + sigma + n0-- + continue + } + } + if z[nn-3] > z[nn-7] { + z[nn-3], z[nn-7] = z[nn-7], z[nn-3] + } + t = 0.5 * (z[nn-7] - z[nn-3] + z[nn-5]) + if z[nn-5] > z[nn-3]*tol2 && t != 0 { + s := z[nn-3] * (z[nn-5] / t) + if s <= t { + s = z[nn-3] * (z[nn-5] / (t * (1 + math.Sqrt(1+s/t)))) + } else { + s = z[nn-3] * (z[nn-5] / (t + math.Sqrt(t)*math.Sqrt(t+s))) + } + t = z[nn-7] + (s + z[nn-5]) + z[nn-3] *= z[nn-7] / t + z[nn-7] = t + } + z[4*(n0+1)-8] = z[nn-7] + sigma + z[4*(n0+1)-4] = z[nn-3] + sigma + n0 -= 2 + } + if pp == 2 { + pp = 0 + } + + // Reverse the qd-array, if warranted. + if dmin <= 0 || n0 < n0in { + if cbias*z[4*(i0+1)+pp-4] < z[4*(n0+1)+pp-4] { + ipn4Out := 4 * (i0 + n0 + 2) + for j4loop := 4 * (i0 + 1); j4loop <= 2*((i0+1)+(n0+1)-1); j4loop += 4 { + ipn4 := ipn4Out - 1 + j4 := j4loop - 1 + + z[j4-3], z[ipn4-j4-4] = z[ipn4-j4-4], z[j4-3] + z[j4-2], z[ipn4-j4-3] = z[ipn4-j4-3], z[j4-2] + z[j4-1], z[ipn4-j4-6] = z[ipn4-j4-6], z[j4-1] + z[j4], z[ipn4-j4-5] = z[ipn4-j4-5], z[j4] + } + if n0-i0 <= 4 { + z[4*(n0+1)+pp-2] = z[4*(i0+1)+pp-2] + z[4*(n0+1)-pp-1] = z[4*(i0+1)-pp-1] + } + dmin2 = math.Min(dmin2, z[4*(i0+1)-pp-2]) + z[4*(n0+1)+pp-2] = math.Min(math.Min(z[4*(n0+1)+pp-2], z[4*(i0+1)+pp-2]), z[4*(i0+1)+pp+2]) + z[4*(n0+1)-pp-1] = math.Min(math.Min(z[4*(n0+1)-pp-1], z[4*(i0+1)-pp-1]), z[4*(i0+1)-pp+3]) + qmax = math.Max(math.Max(qmax, z[4*(i0+1)+pp-4]), z[4*(i0+1)+pp]) + dmin = math.Copysign(0, -1) // Fortran code has -zero, but -0 in go is 0 + } + } + + // Choose a shift. + tau, ttype, g = impl.Dlasq4(i0, n0, z, pp, n0in, dmin, dmin1, dmin2, dn, dn1, dn2, tau, ttype, g) + + // Call dqds until dmin > 0. +loop: + for { + i0, n0, pp, tau, sigma, dmin, dmin1, dmin2, dn, dn1, dn2 = impl.Dlasq5(i0, n0, z, pp, tau, sigma) + + nDiv += n0 - i0 + 2 + iter++ + switch { + case dmin >= 0 && dmin1 >= 0: + // Success. + goto done + + case dmin < 0 && dmin1 > 0 && z[4*n0-pp-1] < tol*(sigma+dn1) && math.Abs(dn) < tol*sigma: + // Convergence hidden by negative dn. + z[4*n0-pp+1] = 0 + dmin = 0 + goto done + + case dmin < 0: + // Tau too big. Select new Tau and try again. + nFail++ + if ttype < -22 { + // Failed twice. Play it safe. + tau = 0 + } else if dmin1 > 0 { + // Late failure. Gives excellent shift. + tau = (tau + dmin) * (1 - 2*eps) + ttype -= 11 + } else { + // Early failure. Divide by 4. + tau = tau / 4 + ttype -= 12 + } + + case math.IsNaN(dmin): + if tau == 0 { + break loop + } + tau = 0 + + default: + // Possible underflow. Play it safe. + break loop + } + } + + // Risk of underflow. + dmin, dmin1, dmin2, dn, dn1, dn2 = impl.Dlasq6(i0, n0, z, pp) + nDiv += n0 - i0 + 2 + iter++ + tau = 0 + +done: + if tau < sigma { + desig += tau + t = sigma + desig + desig -= t - sigma + } else { + t = sigma + tau + desig += sigma - (t - tau) + } + sigma = t + return i0, n0, pp, dmin, sigma, desig, qmax, nFail, iter, nDiv, ttype, dmin1, dmin2, dn, dn1, dn2, g, tau +} diff --git a/vendor/gonum.org/v1/gonum/lapack/gonum/dlasq4.go b/vendor/gonum.org/v1/gonum/lapack/gonum/dlasq4.go new file mode 100644 index 0000000000..f6dbb31b98 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dlasq4.go @@ -0,0 +1,249 @@ +// Copyright ©2015 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gonum + +import "math" + +// Dlasq4 computes an approximation to the smallest eigenvalue using values of d +// from the previous transform. +// i0, n0, and n0in are zero-indexed. +// +// Dlasq4 is an internal routine. It is exported for testing purposes. +func (impl Implementation) Dlasq4(i0, n0 int, z []float64, pp int, n0in int, dmin, dmin1, dmin2, dn, dn1, dn2, tau float64, ttype int, g float64) (tauOut float64, ttypeOut int, gOut float64) { + switch { + case i0 < 0: + panic(i0LT0) + case n0 < 0: + panic(n0LT0) + case len(z) < 4*n0: + panic(shortZ) + case pp != 0 && pp != 1: + panic(badPp) + } + + const ( + cnst1 = 0.563 + cnst2 = 1.01 + cnst3 = 1.05 + + cnstthird = 0.333 // TODO(btracey): Fix? + ) + // A negative dmin forces the shift to take that absolute value + // ttype records the type of shift. + if dmin <= 0 { + tau = -dmin + ttype = -1 + return tau, ttype, g + } + nn := 4*(n0+1) + pp - 1 // -1 for zero indexing + s := math.NaN() // Poison s so that failure to take a path below is obvious + if n0in == n0 { + // No eigenvalues deflated. + if dmin == dn || dmin == dn1 { + b1 := math.Sqrt(z[nn-3]) * math.Sqrt(z[nn-5]) + b2 := math.Sqrt(z[nn-7]) * math.Sqrt(z[nn-9]) + a2 := z[nn-7] + z[nn-5] + if dmin == dn && dmin1 == dn1 { + gap2 := dmin2 - a2 - dmin2/4 + var gap1 float64 + if gap2 > 0 && gap2 > b2 { + gap1 = a2 - dn - (b2/gap2)*b2 + } else { + gap1 = a2 - dn - (b1 + b2) + } + if gap1 > 0 && gap1 > b1 { + s = math.Max(dn-(b1/gap1)*b1, 0.5*dmin) + ttype = -2 + } else { + s = 0 + if dn > b1 { + s = dn - b1 + } + if a2 > b1+b2 { + s = math.Min(s, a2-(b1+b2)) + } + s = math.Max(s, cnstthird*dmin) + ttype = -3 + } + } else { + ttype = -4 + s = dmin / 4 + var gam float64 + var np int + if dmin == dn { + gam = dn + a2 = 0 + if z[nn-5] > z[nn-7] { + return tau, ttype, g + } + b2 = z[nn-5] / z[nn-7] + np = nn - 9 + } else { + np = nn - 2*pp + gam = dn1 + if z[np-4] > z[np-2] { + return tau, ttype, g + } + a2 = z[np-4] / z[np-2] + if z[nn-9] > z[nn-11] { + return tau, ttype, g + } + b2 = z[nn-9] / z[nn-11] + np = nn - 13 + } + // Approximate contribution to norm squared from i < nn-1. + a2 += b2 + for i4loop := np + 1; i4loop >= 4*(i0+1)-1+pp; i4loop -= 4 { + i4 := i4loop - 1 + if b2 == 0 { + break + } + b1 = b2 + if z[i4] > z[i4-2] { + return tau, ttype, g + } + b2 *= z[i4] / z[i4-2] + a2 += b2 + if 100*math.Max(b2, b1) < a2 || cnst1 < a2 { + break + } + } + a2 *= cnst3 + // Rayleigh quotient residual bound. + if a2 < cnst1 { + s = gam * (1 - math.Sqrt(a2)) / (1 + a2) + } + } + } else if dmin == dn2 { + ttype = -5 + s = dmin / 4 + // Compute contribution to norm squared from i > nn-2. + np := nn - 2*pp + b1 := z[np-2] + b2 := z[np-6] + gam := dn2 + if z[np-8] > b2 || z[np-4] > b1 { + return tau, ttype, g + } + a2 := (z[np-8] / b2) * (1 + z[np-4]/b1) + // Approximate contribution to norm squared from i < nn-2. + if n0-i0 > 2 { + b2 = z[nn-13] / z[nn-15] + a2 += b2 + for i4loop := (nn + 1) - 17; i4loop >= 4*(i0+1)-1+pp; i4loop -= 4 { + i4 := i4loop - 1 + if b2 == 0 { + break + } + b1 = b2 + if z[i4] > z[i4-2] { + return tau, ttype, g + } + b2 *= z[i4] / z[i4-2] + a2 += b2 + if 100*math.Max(b2, b1) < a2 || cnst1 < a2 { + break + } + } + a2 *= cnst3 + } + if a2 < cnst1 { + s = gam * (1 - math.Sqrt(a2)) / (1 + a2) + } + } else { + // Case 6, no information to guide us. + if ttype == -6 { + g += cnstthird * (1 - g) + } else if ttype == -18 { + g = cnstthird / 4 + } else { + g = 1.0 / 4 + } + s = g * dmin + ttype = -6 + } + } else if n0in == (n0 + 1) { + // One eigenvalue just deflated. Use DMIN1, DN1 for DMIN and DN. + if dmin1 == dn1 && dmin2 == dn2 { + ttype = -7 + s = cnstthird * dmin1 + if z[nn-5] > z[nn-7] { + return tau, ttype, g + } + b1 := z[nn-5] / z[nn-7] + b2 := b1 + if b2 != 0 { + for i4loop := 4*(n0+1) - 9 + pp; i4loop >= 4*(i0+1)-1+pp; i4loop -= 4 { + i4 := i4loop - 1 + a2 := b1 + if z[i4] > z[i4-2] { + return tau, ttype, g + } + b1 *= z[i4] / z[i4-2] + b2 += b1 + if 100*math.Max(b1, a2) < b2 { + break + } + } + } + b2 = math.Sqrt(cnst3 * b2) + a2 := dmin1 / (1 + b2*b2) + gap2 := 0.5*dmin2 - a2 + if gap2 > 0 && gap2 > b2*a2 { + s = math.Max(s, a2*(1-cnst2*a2*(b2/gap2)*b2)) + } else { + s = math.Max(s, a2*(1-cnst2*b2)) + ttype = -8 + } + } else { + s = dmin1 / 4 + if dmin1 == dn1 { + s = 0.5 * dmin1 + } + ttype = -9 + } + } else if n0in == (n0 + 2) { + // Two eigenvalues deflated. Use DMIN2, DN2 for DMIN and DN. + if dmin2 == dn2 && 2*z[nn-5] < z[nn-7] { + ttype = -10 + s = cnstthird * dmin2 + if z[nn-5] > z[nn-7] { + return tau, ttype, g + } + b1 := z[nn-5] / z[nn-7] + b2 := b1 + if b2 != 0 { + for i4loop := 4*(n0+1) - 9 + pp; i4loop >= 4*(i0+1)-1+pp; i4loop -= 4 { + i4 := i4loop - 1 + if z[i4] > z[i4-2] { + return tau, ttype, g + } + b1 *= z[i4] / z[i4-2] + b2 += b1 + if 100*b1 < b2 { + break + } + } + } + b2 = math.Sqrt(cnst3 * b2) + a2 := dmin2 / (1 + b2*b2) + gap2 := z[nn-7] + z[nn-9] - math.Sqrt(z[nn-11])*math.Sqrt(z[nn-9]) - a2 + if gap2 > 0 && gap2 > b2*a2 { + s = math.Max(s, a2*(1-cnst2*a2*(b2/gap2)*b2)) + } else { + s = math.Max(s, a2*(1-cnst2*b2)) + } + } else { + s = dmin2 / 4 + ttype = -11 + } + } else if n0in > n0+2 { + // Case 12, more than two eigenvalues deflated. No information. + s = 0 + ttype = -12 + } + tau = s + return tau, ttype, g +} diff --git a/vendor/gonum.org/v1/gonum/lapack/gonum/dlasq5.go b/vendor/gonum.org/v1/gonum/lapack/gonum/dlasq5.go new file mode 100644 index 0000000000..d3826d9186 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dlasq5.go @@ -0,0 +1,140 @@ +// Copyright ©2015 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gonum + +import "math" + +// Dlasq5 computes one dqds transform in ping-pong form. +// i0 and n0 are zero-indexed. +// +// Dlasq5 is an internal routine. It is exported for testing purposes. +func (impl Implementation) Dlasq5(i0, n0 int, z []float64, pp int, tau, sigma float64) (i0Out, n0Out, ppOut int, tauOut, sigmaOut, dmin, dmin1, dmin2, dn, dnm1, dnm2 float64) { + // The lapack function has inputs for ieee and eps, but Go requires ieee so + // these are unnecessary. + + switch { + case i0 < 0: + panic(i0LT0) + case n0 < 0: + panic(n0LT0) + case len(z) < 4*n0: + panic(shortZ) + case pp != 0 && pp != 1: + panic(badPp) + } + + if n0-i0-1 <= 0 { + return i0, n0, pp, tau, sigma, dmin, dmin1, dmin2, dn, dnm1, dnm2 + } + + eps := dlamchP + dthresh := eps * (sigma + tau) + if tau < dthresh*0.5 { + tau = 0 + } + var j4 int + var emin float64 + if tau != 0 { + j4 = 4*i0 + pp + emin = z[j4+4] + d := z[j4] - tau + dmin = d + // In the reference there are code paths that actually return this value. + // dmin1 = -z[j4] + if pp == 0 { + for j4loop := 4 * (i0 + 1); j4loop <= 4*((n0+1)-3); j4loop += 4 { + j4 := j4loop - 1 + z[j4-2] = d + z[j4-1] + tmp := z[j4+1] / z[j4-2] + d = d*tmp - tau + dmin = math.Min(dmin, d) + z[j4] = z[j4-1] * tmp + emin = math.Min(z[j4], emin) + } + } else { + for j4loop := 4 * (i0 + 1); j4loop <= 4*((n0+1)-3); j4loop += 4 { + j4 := j4loop - 1 + z[j4-3] = d + z[j4] + tmp := z[j4+2] / z[j4-3] + d = d*tmp - tau + dmin = math.Min(dmin, d) + z[j4-1] = z[j4] * tmp + emin = math.Min(z[j4-1], emin) + } + } + // Unroll the last two steps. + dnm2 = d + dmin2 = dmin + j4 = 4*((n0+1)-2) - pp - 1 + j4p2 := j4 + 2*pp - 1 + z[j4-2] = dnm2 + z[j4p2] + z[j4] = z[j4p2+2] * (z[j4p2] / z[j4-2]) + dnm1 = z[j4p2+2]*(dnm2/z[j4-2]) - tau + dmin = math.Min(dmin, dnm1) + + dmin1 = dmin + j4 += 4 + j4p2 = j4 + 2*pp - 1 + z[j4-2] = dnm1 + z[j4p2] + z[j4] = z[j4p2+2] * (z[j4p2] / z[j4-2]) + dn = z[j4p2+2]*(dnm1/z[j4-2]) - tau + dmin = math.Min(dmin, dn) + } else { + // This is the version that sets d's to zero if they are small enough. + j4 = 4*(i0+1) + pp - 4 + emin = z[j4+4] + d := z[j4] - tau + dmin = d + // In the reference there are code paths that actually return this value. + // dmin1 = -z[j4] + if pp == 0 { + for j4loop := 4 * (i0 + 1); j4loop <= 4*((n0+1)-3); j4loop += 4 { + j4 := j4loop - 1 + z[j4-2] = d + z[j4-1] + tmp := z[j4+1] / z[j4-2] + d = d*tmp - tau + if d < dthresh { + d = 0 + } + dmin = math.Min(dmin, d) + z[j4] = z[j4-1] * tmp + emin = math.Min(z[j4], emin) + } + } else { + for j4loop := 4 * (i0 + 1); j4loop <= 4*((n0+1)-3); j4loop += 4 { + j4 := j4loop - 1 + z[j4-3] = d + z[j4] + tmp := z[j4+2] / z[j4-3] + d = d*tmp - tau + if d < dthresh { + d = 0 + } + dmin = math.Min(dmin, d) + z[j4-1] = z[j4] * tmp + emin = math.Min(z[j4-1], emin) + } + } + // Unroll the last two steps. + dnm2 = d + dmin2 = dmin + j4 = 4*((n0+1)-2) - pp - 1 + j4p2 := j4 + 2*pp - 1 + z[j4-2] = dnm2 + z[j4p2] + z[j4] = z[j4p2+2] * (z[j4p2] / z[j4-2]) + dnm1 = z[j4p2+2]*(dnm2/z[j4-2]) - tau + dmin = math.Min(dmin, dnm1) + + dmin1 = dmin + j4 += 4 + j4p2 = j4 + 2*pp - 1 + z[j4-2] = dnm1 + z[j4p2] + z[j4] = z[j4p2+2] * (z[j4p2] / z[j4-2]) + dn = z[j4p2+2]*(dnm1/z[j4-2]) - tau + dmin = math.Min(dmin, dn) + } + z[j4+2] = dn + z[4*(n0+1)-pp-1] = emin + return i0, n0, pp, tau, sigma, dmin, dmin1, dmin2, dn, dnm1, dnm2 +} diff --git a/vendor/gonum.org/v1/gonum/lapack/gonum/dlasq6.go b/vendor/gonum.org/v1/gonum/lapack/gonum/dlasq6.go new file mode 100644 index 0000000000..54bf587562 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dlasq6.go @@ -0,0 +1,118 @@ +// Copyright ©2015 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gonum + +import "math" + +// Dlasq6 computes one dqd transform in ping-pong form with protection against +// overflow and underflow. z has length at least 4*(n0+1) and holds the qd array. +// i0 is the zero-based first index. +// n0 is the zero-based last index. +// +// Dlasq6 is an internal routine. It is exported for testing purposes. +func (impl Implementation) Dlasq6(i0, n0 int, z []float64, pp int) (dmin, dmin1, dmin2, dn, dnm1, dnm2 float64) { + switch { + case i0 < 0: + panic(i0LT0) + case n0 < 0: + panic(n0LT0) + case len(z) < 4*n0: + panic(shortZ) + case pp != 0 && pp != 1: + panic(badPp) + } + + if n0-i0-1 <= 0 { + return dmin, dmin1, dmin2, dn, dnm1, dnm2 + } + + safmin := dlamchS + j4 := 4*(i0+1) + pp - 4 // -4 rather than -3 for zero indexing + emin := z[j4+4] + d := z[j4] + dmin = d + if pp == 0 { + for j4loop := 4 * (i0 + 1); j4loop <= 4*((n0+1)-3); j4loop += 4 { + j4 := j4loop - 1 // Translate back to zero-indexed. + z[j4-2] = d + z[j4-1] + if z[j4-2] == 0 { + z[j4] = 0 + d = z[j4+1] + dmin = d + emin = 0 + } else if safmin*z[j4+1] < z[j4-2] && safmin*z[j4-2] < z[j4+1] { + tmp := z[j4+1] / z[j4-2] + z[j4] = z[j4-1] * tmp + d *= tmp + } else { + z[j4] = z[j4+1] * (z[j4-1] / z[j4-2]) + d = z[j4+1] * (d / z[j4-2]) + } + dmin = math.Min(dmin, d) + emin = math.Min(emin, z[j4]) + } + } else { + for j4loop := 4 * (i0 + 1); j4loop <= 4*((n0+1)-3); j4loop += 4 { + j4 := j4loop - 1 + z[j4-3] = d + z[j4] + if z[j4-3] == 0 { + z[j4-1] = 0 + d = z[j4+2] + dmin = d + emin = 0 + } else if safmin*z[j4+2] < z[j4-3] && safmin*z[j4-3] < z[j4+2] { + tmp := z[j4+2] / z[j4-3] + z[j4-1] = z[j4] * tmp + d *= tmp + } else { + z[j4-1] = z[j4+2] * (z[j4] / z[j4-3]) + d = z[j4+2] * (d / z[j4-3]) + } + dmin = math.Min(dmin, d) + emin = math.Min(emin, z[j4-1]) + } + } + // Unroll last two steps. + dnm2 = d + dmin2 = dmin + j4 = 4*(n0-1) - pp - 1 + j4p2 := j4 + 2*pp - 1 + z[j4-2] = dnm2 + z[j4p2] + if z[j4-2] == 0 { + z[j4] = 0 + dnm1 = z[j4p2+2] + dmin = dnm1 + emin = 0 + } else if safmin*z[j4p2+2] < z[j4-2] && safmin*z[j4-2] < z[j4p2+2] { + tmp := z[j4p2+2] / z[j4-2] + z[j4] = z[j4p2] * tmp + dnm1 = dnm2 * tmp + } else { + z[j4] = z[j4p2+2] * (z[j4p2] / z[j4-2]) + dnm1 = z[j4p2+2] * (dnm2 / z[j4-2]) + } + dmin = math.Min(dmin, dnm1) + dmin1 = dmin + j4 += 4 + j4p2 = j4 + 2*pp - 1 + z[j4-2] = dnm1 + z[j4p2] + if z[j4-2] == 0 { + z[j4] = 0 + dn = z[j4p2+2] + dmin = dn + emin = 0 + } else if safmin*z[j4p2+2] < z[j4-2] && safmin*z[j4-2] < z[j4p2+2] { + tmp := z[j4p2+2] / z[j4-2] + z[j4] = z[j4p2] * tmp + dn = dnm1 * tmp + } else { + z[j4] = z[j4p2+2] * (z[j4p2] / z[j4-2]) + dn = z[j4p2+2] * (dnm1 / z[j4-2]) + } + dmin = math.Min(dmin, dn) + z[j4+2] = dn + z[4*(n0+1)-pp-1] = emin + return dmin, dmin1, dmin2, dn, dnm1, dnm2 +} diff --git a/vendor/gonum.org/v1/gonum/lapack/gonum/dlasr.go b/vendor/gonum.org/v1/gonum/lapack/gonum/dlasr.go new file mode 100644 index 0000000000..3aab41f8e3 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dlasr.go @@ -0,0 +1,287 @@ +// Copyright ©2015 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gonum + +import ( + "gonum.org/v1/gonum/blas" + "gonum.org/v1/gonum/lapack" +) + +// Dlasr applies a sequence of plane rotations to the m×n matrix A. This series +// of plane rotations is implicitly represented by a matrix P. P is multiplied +// by a depending on the value of side -- A = P * A if side == lapack.Left, +// A = A * Pᵀ if side == lapack.Right. +// +// The exact value of P depends on the value of pivot, but in all cases P is +// implicitly represented by a series of 2×2 rotation matrices. The entries of +// rotation matrix k are defined by s[k] and c[k] +// +// R(k) = [ c[k] s[k]] +// [-s[k] s[k]] +// +// If direct == lapack.Forward, the rotation matrices are applied as +// P = P(z-1) * ... * P(2) * P(1), while if direct == lapack.Backward they are +// applied as P = P(1) * P(2) * ... * P(n). +// +// pivot defines the mapping of the elements in R(k) to P(k). +// If pivot == lapack.Variable, the rotation is performed for the (k, k+1) plane. +// +// P(k) = [1 ] +// [ ... ] +// [ 1 ] +// [ c[k] s[k] ] +// [ -s[k] c[k] ] +// [ 1 ] +// [ ... ] +// [ 1] +// +// if pivot == lapack.Top, the rotation is performed for the (1, k+1) plane, +// +// P(k) = [c[k] s[k] ] +// [ 1 ] +// [ ... ] +// [ 1 ] +// [-s[k] c[k] ] +// [ 1 ] +// [ ... ] +// [ 1] +// +// and if pivot == lapack.Bottom, the rotation is performed for the (k, z) plane. +// +// P(k) = [1 ] +// [ ... ] +// [ 1 ] +// [ c[k] s[k]] +// [ 1 ] +// [ ... ] +// [ 1 ] +// [ -s[k] c[k]] +// +// s and c have length m - 1 if side == blas.Left, and n - 1 if side == blas.Right. +// +// Dlasr is an internal routine. It is exported for testing purposes. +func (impl Implementation) Dlasr(side blas.Side, pivot lapack.Pivot, direct lapack.Direct, m, n int, c, s, a []float64, lda int) { + switch { + case side != blas.Left && side != blas.Right: + panic(badSide) + case pivot != lapack.Variable && pivot != lapack.Top && pivot != lapack.Bottom: + panic(badPivot) + case direct != lapack.Forward && direct != lapack.Backward: + panic(badDirect) + case m < 0: + panic(mLT0) + case n < 0: + panic(nLT0) + case lda < max(1, n): + panic(badLdA) + } + + // Quick return if possible. + if m == 0 || n == 0 { + return + } + + if side == blas.Left { + if len(c) < m-1 { + panic(shortC) + } + if len(s) < m-1 { + panic(shortS) + } + } else { + if len(c) < n-1 { + panic(shortC) + } + if len(s) < n-1 { + panic(shortS) + } + } + if len(a) < (m-1)*lda+n { + panic(shortA) + } + + if side == blas.Left { + if pivot == lapack.Variable { + if direct == lapack.Forward { + for j := 0; j < m-1; j++ { + ctmp := c[j] + stmp := s[j] + if ctmp != 1 || stmp != 0 { + for i := 0; i < n; i++ { + tmp2 := a[j*lda+i] + tmp := a[(j+1)*lda+i] + a[(j+1)*lda+i] = ctmp*tmp - stmp*tmp2 + a[j*lda+i] = stmp*tmp + ctmp*tmp2 + } + } + } + return + } + for j := m - 2; j >= 0; j-- { + ctmp := c[j] + stmp := s[j] + if ctmp != 1 || stmp != 0 { + for i := 0; i < n; i++ { + tmp2 := a[j*lda+i] + tmp := a[(j+1)*lda+i] + a[(j+1)*lda+i] = ctmp*tmp - stmp*tmp2 + a[j*lda+i] = stmp*tmp + ctmp*tmp2 + } + } + } + return + } else if pivot == lapack.Top { + if direct == lapack.Forward { + for j := 1; j < m; j++ { + ctmp := c[j-1] + stmp := s[j-1] + if ctmp != 1 || stmp != 0 { + for i := 0; i < n; i++ { + tmp := a[j*lda+i] + tmp2 := a[i] + a[j*lda+i] = ctmp*tmp - stmp*tmp2 + a[i] = stmp*tmp + ctmp*tmp2 + } + } + } + return + } + for j := m - 1; j >= 1; j-- { + ctmp := c[j-1] + stmp := s[j-1] + if ctmp != 1 || stmp != 0 { + for i := 0; i < n; i++ { + ctmp := c[j-1] + stmp := s[j-1] + if ctmp != 1 || stmp != 0 { + for i := 0; i < n; i++ { + tmp := a[j*lda+i] + tmp2 := a[i] + a[j*lda+i] = ctmp*tmp - stmp*tmp2 + a[i] = stmp*tmp + ctmp*tmp2 + } + } + } + } + } + return + } + if direct == lapack.Forward { + for j := 0; j < m-1; j++ { + ctmp := c[j] + stmp := s[j] + if ctmp != 1 || stmp != 0 { + for i := 0; i < n; i++ { + tmp := a[j*lda+i] + tmp2 := a[(m-1)*lda+i] + a[j*lda+i] = stmp*tmp2 + ctmp*tmp + a[(m-1)*lda+i] = ctmp*tmp2 - stmp*tmp + } + } + } + return + } + for j := m - 2; j >= 0; j-- { + ctmp := c[j] + stmp := s[j] + if ctmp != 1 || stmp != 0 { + for i := 0; i < n; i++ { + tmp := a[j*lda+i] + tmp2 := a[(m-1)*lda+i] + a[j*lda+i] = stmp*tmp2 + ctmp*tmp + a[(m-1)*lda+i] = ctmp*tmp2 - stmp*tmp + } + } + } + return + } + if pivot == lapack.Variable { + if direct == lapack.Forward { + for j := 0; j < n-1; j++ { + ctmp := c[j] + stmp := s[j] + if ctmp != 1 || stmp != 0 { + for i := 0; i < m; i++ { + tmp := a[i*lda+j+1] + tmp2 := a[i*lda+j] + a[i*lda+j+1] = ctmp*tmp - stmp*tmp2 + a[i*lda+j] = stmp*tmp + ctmp*tmp2 + } + } + } + return + } + for j := n - 2; j >= 0; j-- { + ctmp := c[j] + stmp := s[j] + if ctmp != 1 || stmp != 0 { + for i := 0; i < m; i++ { + tmp := a[i*lda+j+1] + tmp2 := a[i*lda+j] + a[i*lda+j+1] = ctmp*tmp - stmp*tmp2 + a[i*lda+j] = stmp*tmp + ctmp*tmp2 + } + } + } + return + } else if pivot == lapack.Top { + if direct == lapack.Forward { + for j := 1; j < n; j++ { + ctmp := c[j-1] + stmp := s[j-1] + if ctmp != 1 || stmp != 0 { + for i := 0; i < m; i++ { + tmp := a[i*lda+j] + tmp2 := a[i*lda] + a[i*lda+j] = ctmp*tmp - stmp*tmp2 + a[i*lda] = stmp*tmp + ctmp*tmp2 + } + } + } + return + } + for j := n - 1; j >= 1; j-- { + ctmp := c[j-1] + stmp := s[j-1] + if ctmp != 1 || stmp != 0 { + for i := 0; i < m; i++ { + tmp := a[i*lda+j] + tmp2 := a[i*lda] + a[i*lda+j] = ctmp*tmp - stmp*tmp2 + a[i*lda] = stmp*tmp + ctmp*tmp2 + } + } + } + return + } + if direct == lapack.Forward { + for j := 0; j < n-1; j++ { + ctmp := c[j] + stmp := s[j] + if ctmp != 1 || stmp != 0 { + for i := 0; i < m; i++ { + tmp := a[i*lda+j] + tmp2 := a[i*lda+n-1] + a[i*lda+j] = stmp*tmp2 + ctmp*tmp + a[i*lda+n-1] = ctmp*tmp2 - stmp*tmp + } + + } + } + return + } + for j := n - 2; j >= 0; j-- { + ctmp := c[j] + stmp := s[j] + if ctmp != 1 || stmp != 0 { + for i := 0; i < m; i++ { + tmp := a[i*lda+j] + tmp2 := a[i*lda+n-1] + a[i*lda+j] = stmp*tmp2 + ctmp*tmp + a[i*lda+n-1] = ctmp*tmp2 - stmp*tmp + } + } + } +} diff --git a/vendor/gonum.org/v1/gonum/lapack/gonum/dlasrt.go b/vendor/gonum.org/v1/gonum/lapack/gonum/dlasrt.go new file mode 100644 index 0000000000..be472805bf --- /dev/null +++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dlasrt.go @@ -0,0 +1,36 @@ +// Copyright ©2015 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gonum + +import ( + "sort" + + "gonum.org/v1/gonum/lapack" +) + +// Dlasrt sorts the numbers in the input slice d. If s == lapack.SortIncreasing, +// the elements are sorted in increasing order. If s == lapack.SortDecreasing, +// the elements are sorted in decreasing order. For other values of s Dlasrt +// will panic. +// +// Dlasrt is an internal routine. It is exported for testing purposes. +func (impl Implementation) Dlasrt(s lapack.Sort, n int, d []float64) { + switch { + case n < 0: + panic(nLT0) + case len(d) < n: + panic(shortD) + } + + d = d[:n] + switch s { + default: + panic(badSort) + case lapack.SortIncreasing: + sort.Float64s(d) + case lapack.SortDecreasing: + sort.Sort(sort.Reverse(sort.Float64Slice(d))) + } +} diff --git a/vendor/gonum.org/v1/gonum/lapack/gonum/dlassq.go b/vendor/gonum.org/v1/gonum/lapack/gonum/dlassq.go new file mode 100644 index 0000000000..8f8cda8789 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dlassq.go @@ -0,0 +1,131 @@ +// Copyright ©2015 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gonum + +import "math" + +// Dlassq updates a sum of squares represented in scaled form. Dlassq returns +// the values scl and smsq such that +// +// scl^2*smsq = X[0]^2 + ... + X[n-1]^2 + scale^2*sumsq +// +// The value of sumsq is assumed to be non-negative. +// +// Dlassq is an internal routine. It is exported for testing purposes. +func (impl Implementation) Dlassq(n int, x []float64, incx int, scale float64, sumsq float64) (scl, smsq float64) { + // Implementation based on Supplemental Material to: + // Edward Anderson. 2017. Algorithm 978: Safe Scaling in the Level 1 BLAS. + // ACM Trans. Math. Softw. 44, 1, Article 12 (July 2017), 28 pages. + // DOI: https://doi.org/10.1145/3061665 + switch { + case n < 0: + panic(nLT0) + case incx <= 0: + panic(badIncX) + case len(x) < 1+(n-1)*incx: + panic(shortX) + } + + if math.IsNaN(scale) || math.IsNaN(sumsq) { + return scale, sumsq + } + + if sumsq == 0 { + scale = 1 + } + if scale == 0 { + scale = 1 + sumsq = 0 + } + + if n == 0 { + return scale, sumsq + } + + // Compute the sum of squares in 3 accumulators: + // - abig: sum of squares scaled down to avoid overflow + // - asml: sum of squares scaled up to avoid underflow + // - amed: sum of squares that do not require scaling + // The thresholds and multipliers are: + // - values bigger than dtbig are scaled down by dsbig + // - values smaller than dtsml are scaled up by dssml + var ( + isBig bool + asml, amed, abig float64 + ) + for i, ix := 0, 0; i < n; i++ { + ax := math.Abs(x[ix]) + switch { + case ax > dtbig: + ax *= dsbig + abig += ax * ax + isBig = true + case ax < dtsml: + if !isBig { + ax *= dssml + asml += ax * ax + } + default: + amed += ax * ax + } + ix += incx + } + // Put the existing sum of squares into one of the accumulators. + if sumsq > 0 { + ax := scale * math.Sqrt(sumsq) + switch { + case ax > dtbig: + if scale > 1 { + scale *= dsbig + abig += scale * (scale * sumsq) + } else { + // sumsq > dtbig^2 => (dsbig * (dsbig * sumsq)) is representable. + abig += scale * (scale * (dsbig * (dsbig * sumsq))) + } + case ax < dtsml: + if !isBig { + if scale < 1 { + scale *= dssml + asml += scale * (scale * sumsq) + } else { + // sumsq < dtsml^2 => (dssml * (dssml * sumsq)) is representable. + asml += scale * (scale * (dssml * (dssml * sumsq))) + } + } + default: + amed += scale * (scale * sumsq) + } + } + // Combine abig and amed or amed and asml if more than one accumulator was + // used. + switch { + case abig > 0: + // Combine abig and amed: + if amed > 0 || math.IsNaN(amed) { + abig += (amed * dsbig) * dsbig + } + scale = 1 / dsbig + sumsq = abig + case asml > 0: + // Combine amed and asml: + if amed > 0 || math.IsNaN(amed) { + amed = math.Sqrt(amed) + asml = math.Sqrt(asml) / dssml + ymin, ymax := asml, amed + if asml > amed { + ymin, ymax = amed, asml + } + scale = 1 + sumsq = ymax * ymax * (1 + (ymin/ymax)*(ymin/ymax)) + } else { + scale = 1 / dssml + sumsq = asml + } + default: + scale = 1 + sumsq = amed + } + return scale, sumsq +} diff --git a/vendor/gonum.org/v1/gonum/lapack/gonum/dlasv2.go b/vendor/gonum.org/v1/gonum/lapack/gonum/dlasv2.go new file mode 100644 index 0000000000..cc7ceea0b8 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dlasv2.go @@ -0,0 +1,117 @@ +// Copyright ©2015 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gonum + +import "math" + +// Dlasv2 computes the singular value decomposition of a 2×2 matrix. +// +// [ csl snl] [f g] [csr -snr] = [ssmax 0] +// [-snl csl] [0 h] [snr csr] = [ 0 ssmin] +// +// ssmax is the larger absolute singular value, and ssmin is the smaller absolute +// singular value. [cls, snl] and [csr, snr] are the left and right singular vectors. +// +// Dlasv2 is an internal routine. It is exported for testing purposes. +func (impl Implementation) Dlasv2(f, g, h float64) (ssmin, ssmax, snr, csr, snl, csl float64) { + ft := f + fa := math.Abs(ft) + ht := h + ha := math.Abs(h) + // pmax points to the largest element of the matrix in terms of absolute value. + // 1 if F, 2 if G, 3 if H. + pmax := 1 + swap := ha > fa + if swap { + pmax = 3 + ft, ht = ht, ft + fa, ha = ha, fa + } + gt := g + ga := math.Abs(gt) + var clt, crt, slt, srt float64 + if ga == 0 { + ssmin = ha + ssmax = fa + clt = 1 + crt = 1 + slt = 0 + srt = 0 + } else { + gasmall := true + if ga > fa { + pmax = 2 + if (fa / ga) < dlamchE { + gasmall = false + ssmax = ga + if ha > 1 { + ssmin = fa / (ga / ha) + } else { + ssmin = (fa / ga) * ha + } + clt = 1 + slt = ht / gt + srt = 1 + crt = ft / gt + } + } + if gasmall { + d := fa - ha + l := d / fa + if d == fa { // deal with inf + l = 1 + } + m := gt / ft + t := 2 - l + s := math.Hypot(t, m) + var r float64 + if l == 0 { + r = math.Abs(m) + } else { + r = math.Hypot(l, m) + } + a := 0.5 * (s + r) + ssmin = ha / a + ssmax = fa * a + if m == 0 { + if l == 0 { + t = math.Copysign(2, ft) * math.Copysign(1, gt) + } else { + t = gt/math.Copysign(d, ft) + m/t + } + } else { + t = (m/(s+t) + m/(r+l)) * (1 + a) + } + l = math.Hypot(t, 2) + crt = 2 / l + srt = t / l + clt = (crt + srt*m) / a + slt = (ht / ft) * srt / a + } + } + if swap { + csl = srt + snl = crt + csr = slt + snr = clt + } else { + csl = clt + snl = slt + csr = crt + snr = srt + } + var tsign float64 + switch pmax { + case 1: + tsign = math.Copysign(1, csr) * math.Copysign(1, csl) * math.Copysign(1, f) + case 2: + tsign = math.Copysign(1, snr) * math.Copysign(1, csl) * math.Copysign(1, g) + case 3: + tsign = math.Copysign(1, snr) * math.Copysign(1, snl) * math.Copysign(1, h) + } + ssmax = math.Copysign(ssmax, tsign) + ssmin = math.Copysign(ssmin, tsign*math.Copysign(1, f)*math.Copysign(1, h)) + return ssmin, ssmax, snr, csr, snl, csl +} diff --git a/vendor/gonum.org/v1/gonum/lapack/gonum/dlaswp.go b/vendor/gonum.org/v1/gonum/lapack/gonum/dlaswp.go new file mode 100644 index 0000000000..88600ac17b --- /dev/null +++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dlaswp.go @@ -0,0 +1,58 @@ +// Copyright ©2015 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gonum + +import "gonum.org/v1/gonum/blas/blas64" + +// Dlaswp swaps the rows k1 to k2 of a rectangular matrix A according to the +// indices in ipiv so that row k is swapped with ipiv[k]. +// +// n is the number of columns of A and incX is the increment for ipiv. If incX +// is 1, the swaps are applied from k1 to k2. If incX is -1, the swaps are +// applied in reverse order from k2 to k1. For other values of incX Dlaswp will +// panic. ipiv must have length k2+1, otherwise Dlaswp will panic. +// +// The indices k1, k2, and the elements of ipiv are zero-based. +// +// Dlaswp is an internal routine. It is exported for testing purposes. +func (impl Implementation) Dlaswp(n int, a []float64, lda int, k1, k2 int, ipiv []int, incX int) { + switch { + case n < 0: + panic(nLT0) + case k1 < 0: + panic(badK1) + case k2 < k1: + panic(badK2) + case lda < max(1, n): + panic(badLdA) + case len(a) < k2*lda+n: // A must have at least k2+1 rows. + panic(shortA) + case len(ipiv) != k2+1: + panic(badLenIpiv) + case incX != 1 && incX != -1: + panic(absIncNotOne) + } + + if n == 0 { + return + } + + bi := blas64.Implementation() + if incX == 1 { + for k := k1; k <= k2; k++ { + if k == ipiv[k] { + continue + } + bi.Dswap(n, a[k*lda:], 1, a[ipiv[k]*lda:], 1) + } + return + } + for k := k2; k >= k1; k-- { + if k == ipiv[k] { + continue + } + bi.Dswap(n, a[k*lda:], 1, a[ipiv[k]*lda:], 1) + } +} diff --git a/vendor/gonum.org/v1/gonum/lapack/gonum/dlasy2.go b/vendor/gonum.org/v1/gonum/lapack/gonum/dlasy2.go new file mode 100644 index 0000000000..160b68b84a --- /dev/null +++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dlasy2.go @@ -0,0 +1,292 @@ +// Copyright ©2016 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gonum + +import ( + "math" + + "gonum.org/v1/gonum/blas/blas64" +) + +// Dlasy2 solves the Sylvester matrix equation where the matrices are of order 1 +// or 2. It computes the unknown n1×n2 matrix X so that +// +// TL*X + sgn*X*TR = scale*B if tranl == false and tranr == false, +// TLᵀ*X + sgn*X*TR = scale*B if tranl == true and tranr == false, +// TL*X + sgn*X*TRᵀ = scale*B if tranl == false and tranr == true, +// TLᵀ*X + sgn*X*TRᵀ = scale*B if tranl == true and tranr == true, +// +// where TL is n1×n1, TR is n2×n2, B is n1×n2, and 1 <= n1,n2 <= 2. +// +// isgn must be 1 or -1, and n1 and n2 must be 0, 1, or 2, but these conditions +// are not checked. +// +// Dlasy2 returns three values, a scale factor that is chosen less than or equal +// to 1 to prevent the solution overflowing, the infinity norm of the solution, +// and an indicator of success. If ok is false, TL and TR have eigenvalues that +// are too close, so TL or TR is perturbed to get a non-singular equation. +// +// Dlasy2 is an internal routine. It is exported for testing purposes. +func (impl Implementation) Dlasy2(tranl, tranr bool, isgn, n1, n2 int, tl []float64, ldtl int, tr []float64, ldtr int, b []float64, ldb int, x []float64, ldx int) (scale, xnorm float64, ok bool) { + // TODO(vladimir-ch): Add input validation checks conditionally skipped + // using the build tag mechanism. + + ok = true + // Quick return if possible. + if n1 == 0 || n2 == 0 { + return scale, xnorm, ok + } + + // Set constants to control overflow. + eps := dlamchP + smlnum := dlamchS / eps + sgn := float64(isgn) + + if n1 == 1 && n2 == 1 { + // 1×1 case: TL11*X + sgn*X*TR11 = B11. + tau1 := tl[0] + sgn*tr[0] + bet := math.Abs(tau1) + if bet <= smlnum { + tau1 = smlnum + bet = smlnum + ok = false + } + scale = 1 + gam := math.Abs(b[0]) + if smlnum*gam > bet { + scale = 1 / gam + } + x[0] = b[0] * scale / tau1 + xnorm = math.Abs(x[0]) + return scale, xnorm, ok + } + + if n1+n2 == 3 { + // 1×2 or 2×1 case. + var ( + smin float64 + tmp [4]float64 // tmp is used as a 2×2 row-major matrix. + btmp [2]float64 + ) + if n1 == 1 && n2 == 2 { + // 1×2 case: TL11*[X11 X12] + sgn*[X11 X12]*op[TR11 TR12] = [B11 B12]. + // [TR21 TR22] + smin = math.Abs(tl[0]) + smin = math.Max(smin, math.Max(math.Abs(tr[0]), math.Abs(tr[1]))) + smin = math.Max(smin, math.Max(math.Abs(tr[ldtr]), math.Abs(tr[ldtr+1]))) + smin = math.Max(eps*smin, smlnum) + tmp[0] = tl[0] + sgn*tr[0] + tmp[3] = tl[0] + sgn*tr[ldtr+1] + if tranr { + tmp[1] = sgn * tr[1] + tmp[2] = sgn * tr[ldtr] + } else { + tmp[1] = sgn * tr[ldtr] + tmp[2] = sgn * tr[1] + } + btmp[0] = b[0] + btmp[1] = b[1] + } else { + // 2×1 case: op[TL11 TL12]*[X11] + sgn*[X11]*TR11 = [B11]. + // [TL21 TL22]*[X21] [X21] [B21] + smin = math.Abs(tr[0]) + smin = math.Max(smin, math.Max(math.Abs(tl[0]), math.Abs(tl[1]))) + smin = math.Max(smin, math.Max(math.Abs(tl[ldtl]), math.Abs(tl[ldtl+1]))) + smin = math.Max(eps*smin, smlnum) + tmp[0] = tl[0] + sgn*tr[0] + tmp[3] = tl[ldtl+1] + sgn*tr[0] + if tranl { + tmp[1] = tl[ldtl] + tmp[2] = tl[1] + } else { + tmp[1] = tl[1] + tmp[2] = tl[ldtl] + } + btmp[0] = b[0] + btmp[1] = b[ldb] + } + + // Solve 2×2 system using complete pivoting. + // Set pivots less than smin to smin. + + bi := blas64.Implementation() + ipiv := bi.Idamax(len(tmp), tmp[:], 1) + // Compute the upper triangular matrix [u11 u12]. + // [ 0 u22] + u11 := tmp[ipiv] + if math.Abs(u11) <= smin { + ok = false + u11 = smin + } + locu12 := [4]int{1, 0, 3, 2} // Index in tmp of the element on the same row as the pivot. + u12 := tmp[locu12[ipiv]] + locl21 := [4]int{2, 3, 0, 1} // Index in tmp of the element on the same column as the pivot. + l21 := tmp[locl21[ipiv]] / u11 + locu22 := [4]int{3, 2, 1, 0} // Index in tmp of the remaining element. + u22 := tmp[locu22[ipiv]] - l21*u12 + if math.Abs(u22) <= smin { + ok = false + u22 = smin + } + if ipiv&0x2 != 0 { // true for ipiv equal to 2 and 3. + // The pivot was in the second row, swap the elements of + // the right-hand side. + btmp[0], btmp[1] = btmp[1], btmp[0]-l21*btmp[1] + } else { + btmp[1] -= l21 * btmp[0] + } + scale = 1 + if 2*smlnum*math.Abs(btmp[1]) > math.Abs(u22) || 2*smlnum*math.Abs(btmp[0]) > math.Abs(u11) { + scale = 0.5 / math.Max(math.Abs(btmp[0]), math.Abs(btmp[1])) + btmp[0] *= scale + btmp[1] *= scale + } + // Solve the system [u11 u12] [x21] = [ btmp[0] ]. + // [ 0 u22] [x22] [ btmp[1] ] + x22 := btmp[1] / u22 + x21 := btmp[0]/u11 - (u12/u11)*x22 + if ipiv&0x1 != 0 { // true for ipiv equal to 1 and 3. + // The pivot was in the second column, swap the elements + // of the solution. + x21, x22 = x22, x21 + } + x[0] = x21 + if n1 == 1 { + x[1] = x22 + xnorm = math.Abs(x[0]) + math.Abs(x[1]) + } else { + x[ldx] = x22 + xnorm = math.Max(math.Abs(x[0]), math.Abs(x[ldx])) + } + return scale, xnorm, ok + } + + // 2×2 case: op[TL11 TL12]*[X11 X12] + SGN*[X11 X12]*op[TR11 TR12] = [B11 B12]. + // [TL21 TL22] [X21 X22] [X21 X22] [TR21 TR22] [B21 B22] + // + // Solve equivalent 4×4 system using complete pivoting. + // Set pivots less than smin to smin. + + smin := math.Max(math.Abs(tr[0]), math.Abs(tr[1])) + smin = math.Max(smin, math.Max(math.Abs(tr[ldtr]), math.Abs(tr[ldtr+1]))) + smin = math.Max(smin, math.Max(math.Abs(tl[0]), math.Abs(tl[1]))) + smin = math.Max(smin, math.Max(math.Abs(tl[ldtl]), math.Abs(tl[ldtl+1]))) + smin = math.Max(eps*smin, smlnum) + + var t [4][4]float64 + t[0][0] = tl[0] + sgn*tr[0] + t[1][1] = tl[0] + sgn*tr[ldtr+1] + t[2][2] = tl[ldtl+1] + sgn*tr[0] + t[3][3] = tl[ldtl+1] + sgn*tr[ldtr+1] + if tranl { + t[0][2] = tl[ldtl] + t[1][3] = tl[ldtl] + t[2][0] = tl[1] + t[3][1] = tl[1] + } else { + t[0][2] = tl[1] + t[1][3] = tl[1] + t[2][0] = tl[ldtl] + t[3][1] = tl[ldtl] + } + if tranr { + t[0][1] = sgn * tr[1] + t[1][0] = sgn * tr[ldtr] + t[2][3] = sgn * tr[1] + t[3][2] = sgn * tr[ldtr] + } else { + t[0][1] = sgn * tr[ldtr] + t[1][0] = sgn * tr[1] + t[2][3] = sgn * tr[ldtr] + t[3][2] = sgn * tr[1] + } + + var btmp [4]float64 + btmp[0] = b[0] + btmp[1] = b[1] + btmp[2] = b[ldb] + btmp[3] = b[ldb+1] + + // Perform elimination. + var jpiv [4]int // jpiv records any column swaps for pivoting. + for i := 0; i < 3; i++ { + var ( + xmax float64 + ipsv, jpsv int + ) + for ip := i; ip < 4; ip++ { + for jp := i; jp < 4; jp++ { + if math.Abs(t[ip][jp]) >= xmax { + xmax = math.Abs(t[ip][jp]) + ipsv = ip + jpsv = jp + } + } + } + if ipsv != i { + // The pivot is not in the top row of the unprocessed + // block, swap rows ipsv and i of t and btmp. + t[ipsv], t[i] = t[i], t[ipsv] + btmp[ipsv], btmp[i] = btmp[i], btmp[ipsv] + } + if jpsv != i { + // The pivot is not in the left column of the + // unprocessed block, swap columns jpsv and i of t. + for k := 0; k < 4; k++ { + t[k][jpsv], t[k][i] = t[k][i], t[k][jpsv] + } + } + jpiv[i] = jpsv + if math.Abs(t[i][i]) < smin { + ok = false + t[i][i] = smin + } + for k := i + 1; k < 4; k++ { + t[k][i] /= t[i][i] + btmp[k] -= t[k][i] * btmp[i] + for j := i + 1; j < 4; j++ { + t[k][j] -= t[k][i] * t[i][j] + } + } + } + if math.Abs(t[3][3]) < smin { + ok = false + t[3][3] = smin + } + scale = 1 + if 8*smlnum*math.Abs(btmp[0]) > math.Abs(t[0][0]) || + 8*smlnum*math.Abs(btmp[1]) > math.Abs(t[1][1]) || + 8*smlnum*math.Abs(btmp[2]) > math.Abs(t[2][2]) || + 8*smlnum*math.Abs(btmp[3]) > math.Abs(t[3][3]) { + + maxbtmp := math.Max(math.Abs(btmp[0]), math.Abs(btmp[1])) + maxbtmp = math.Max(maxbtmp, math.Max(math.Abs(btmp[2]), math.Abs(btmp[3]))) + scale = (1.0 / 8.0) / maxbtmp + btmp[0] *= scale + btmp[1] *= scale + btmp[2] *= scale + btmp[3] *= scale + } + // Compute the solution of the upper triangular system t * tmp = btmp. + var tmp [4]float64 + for i := 3; i >= 0; i-- { + temp := 1 / t[i][i] + tmp[i] = btmp[i] * temp + for j := i + 1; j < 4; j++ { + tmp[i] -= temp * t[i][j] * tmp[j] + } + } + for i := 2; i >= 0; i-- { + if jpiv[i] != i { + tmp[i], tmp[jpiv[i]] = tmp[jpiv[i]], tmp[i] + } + } + x[0] = tmp[0] + x[1] = tmp[1] + x[ldx] = tmp[2] + x[ldx+1] = tmp[3] + xnorm = math.Max(math.Abs(tmp[0])+math.Abs(tmp[1]), math.Abs(tmp[2])+math.Abs(tmp[3])) + return scale, xnorm, ok +} diff --git a/vendor/gonum.org/v1/gonum/lapack/gonum/dlatbs.go b/vendor/gonum.org/v1/gonum/lapack/gonum/dlatbs.go new file mode 100644 index 0000000000..e0e809cf90 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dlatbs.go @@ -0,0 +1,454 @@ +// Copyright ©2019 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gonum + +import ( + "math" + + "gonum.org/v1/gonum/blas" + "gonum.org/v1/gonum/blas/blas64" +) + +// Dlatbs solves a triangular banded system of equations +// +// A * x = s*b if trans == blas.NoTrans +// Aᵀ * x = s*b if trans == blas.Trans or blas.ConjTrans +// +// where A is an upper or lower triangular band matrix, x and b are n-element +// vectors, and s is a scaling factor chosen so that the components of x will be +// less than the overflow threshold. +// +// On entry, x contains the right-hand side b of the triangular system. +// On return, x is overwritten by the solution vector x. +// +// normin specifies whether the cnorm parameter contains the column norms of A on +// entry. If it is true, cnorm[j] contains the norm of the off-diagonal part of +// the j-th column of A. If it is false, the norms will be computed and stored +// in cnorm. +// +// Dlatbs returns the scaling factor s for the triangular system. If the matrix +// A is singular (A[j,j]==0 for some j), then scale is set to 0 and a +// non-trivial solution to A*x = 0 is returned. +// +// Dlatbs is an internal routine. It is exported for testing purposes. +func (Implementation) Dlatbs(uplo blas.Uplo, trans blas.Transpose, diag blas.Diag, normin bool, n, kd int, ab []float64, ldab int, x, cnorm []float64) (scale float64) { + noTran := trans == blas.NoTrans + switch { + case uplo != blas.Upper && uplo != blas.Lower: + panic(badUplo) + case !noTran && trans != blas.Trans && trans != blas.ConjTrans: + panic(badTrans) + case diag != blas.NonUnit && diag != blas.Unit: + panic(badDiag) + case n < 0: + panic(nLT0) + case kd < 0: + panic(kdLT0) + case ldab < kd+1: + panic(badLdA) + } + + // Quick return if possible. + if n == 0 { + return 1 + } + + switch { + case len(ab) < (n-1)*ldab+kd+1: + panic(shortAB) + case len(x) < n: + panic(shortX) + case len(cnorm) < n: + panic(shortCNorm) + } + + // Parameters to control overflow. + smlnum := dlamchS / dlamchP + bignum := 1 / smlnum + + bi := blas64.Implementation() + kld := max(1, ldab-1) + if !normin { + // Compute the 1-norm of each column, not including the diagonal. + if uplo == blas.Upper { + for j := 0; j < n; j++ { + jlen := min(j, kd) + if jlen > 0 { + cnorm[j] = bi.Dasum(jlen, ab[(j-jlen)*ldab+jlen:], kld) + } else { + cnorm[j] = 0 + } + } + } else { + for j := 0; j < n; j++ { + jlen := min(n-j-1, kd) + if jlen > 0 { + cnorm[j] = bi.Dasum(jlen, ab[(j+1)*ldab+kd-1:], kld) + } else { + cnorm[j] = 0 + } + } + } + } + + // Set up indices and increments for loops below. + var ( + jFirst, jLast, jInc int + maind int + ) + if noTran { + if uplo == blas.Upper { + jFirst = n - 1 + jLast = -1 + jInc = -1 + maind = 0 + } else { + jFirst = 0 + jLast = n + jInc = 1 + maind = kd + } + } else { + if uplo == blas.Upper { + jFirst = 0 + jLast = n + jInc = 1 + maind = 0 + } else { + jFirst = n - 1 + jLast = -1 + jInc = -1 + maind = kd + } + } + + // Scale the column norms by tscal if the maximum element in cnorm is + // greater than bignum. + tmax := cnorm[bi.Idamax(n, cnorm, 1)] + tscal := 1.0 + if tmax > bignum { + tscal = 1 / (smlnum * tmax) + bi.Dscal(n, tscal, cnorm, 1) + } + + // Compute a bound on the computed solution vector to see if the Level 2 + // BLAS routine Dtbsv can be used. + + xMax := math.Abs(x[bi.Idamax(n, x, 1)]) + xBnd := xMax + grow := 0.0 + // Compute the growth only if the maximum element in cnorm is NOT greater + // than bignum. + if tscal != 1 { + goto skipComputeGrow + } + if noTran { + // Compute the growth in A * x = b. + if diag == blas.NonUnit { + // A is non-unit triangular. + // + // Compute grow = 1/G_j and xBnd = 1/M_j. + // Initially, G_0 = max{x(i), i=1,...,n}. + grow = 1 / math.Max(xBnd, smlnum) + xBnd = grow + for j := jFirst; j != jLast; j += jInc { + if grow <= smlnum { + // Exit the loop because the growth factor is too small. + goto skipComputeGrow + } + // M_j = G_{j-1} / abs(A[j,j]) + tjj := math.Abs(ab[j*ldab+maind]) + xBnd = math.Min(xBnd, math.Min(1, tjj)*grow) + if tjj+cnorm[j] >= smlnum { + // G_j = G_{j-1}*( 1 + cnorm[j] / abs(A[j,j]) ) + grow *= tjj / (tjj + cnorm[j]) + } else { + // G_j could overflow, set grow to 0. + grow = 0 + } + } + grow = xBnd + } else { + // A is unit triangular. + // + // Compute grow = 1/G_j, where G_0 = max{x(i), i=1,...,n}. + grow = math.Min(1, 1/math.Max(xBnd, smlnum)) + for j := jFirst; j != jLast; j += jInc { + if grow <= smlnum { + // Exit the loop because the growth factor is too small. + goto skipComputeGrow + } + // G_j = G_{j-1}*( 1 + cnorm[j] ) + grow /= 1 + cnorm[j] + } + } + } else { + // Compute the growth in Aᵀ * x = b. + if diag == blas.NonUnit { + // A is non-unit triangular. + // + // Compute grow = 1/G_j and xBnd = 1/M_j. + // Initially, G_0 = max{x(i), i=1,...,n}. + grow = 1 / math.Max(xBnd, smlnum) + xBnd = grow + for j := jFirst; j != jLast; j += jInc { + if grow <= smlnum { + // Exit the loop because the growth factor is too small. + goto skipComputeGrow + } + // G_j = max( G_{j-1}, M_{j-1}*( 1 + cnorm[j] ) ) + xj := 1 + cnorm[j] + grow = math.Min(grow, xBnd/xj) + // M_j = M_{j-1}*( 1 + cnorm[j] ) / abs(A[j,j]) + tjj := math.Abs(ab[j*ldab+maind]) + if xj > tjj { + xBnd *= tjj / xj + } + } + grow = math.Min(grow, xBnd) + } else { + // A is unit triangular. + // + // Compute grow = 1/G_j, where G_0 = max{x(i), i=1,...,n}. + grow = math.Min(1, 1/math.Max(xBnd, smlnum)) + for j := jFirst; j != jLast; j += jInc { + if grow <= smlnum { + // Exit the loop because the growth factor is too small. + goto skipComputeGrow + } + // G_j = G_{j-1}*( 1 + cnorm[j] ) + grow /= 1 + cnorm[j] + } + } + } +skipComputeGrow: + + if grow*tscal > smlnum { + // The reciprocal of the bound on elements of X is not too small, use + // the Level 2 BLAS solve. + bi.Dtbsv(uplo, trans, diag, n, kd, ab, ldab, x, 1) + // Scale the column norms by 1/tscal for return. + if tscal != 1 { + bi.Dscal(n, 1/tscal, cnorm, 1) + } + return 1 + } + + // Use a Level 1 BLAS solve, scaling intermediate results. + + scale = 1 + if xMax > bignum { + // Scale x so that its components are less than or equal to bignum in + // absolute value. + scale = bignum / xMax + bi.Dscal(n, scale, x, 1) + xMax = bignum + } + + if noTran { + // Solve A * x = b. + for j := jFirst; j != jLast; j += jInc { + // Compute x[j] = b[j] / A[j,j], scaling x if necessary. + xj := math.Abs(x[j]) + tjjs := tscal + if diag == blas.NonUnit { + tjjs *= ab[j*ldab+maind] + } + tjj := math.Abs(tjjs) + switch { + case tjj > smlnum: + // smlnum < abs(A[j,j]) + if tjj < 1 && xj > tjj*bignum { + // Scale x by 1/b[j]. + rec := 1 / xj + bi.Dscal(n, rec, x, 1) + scale *= rec + xMax *= rec + } + x[j] /= tjjs + xj = math.Abs(x[j]) + case tjj > 0: + // 0 < abs(A[j,j]) <= smlnum + if xj > tjj*bignum { + // Scale x by (1/abs(x[j]))*abs(A[j,j])*bignum to avoid + // overflow when dividing by A[j,j]. + rec := tjj * bignum / xj + if cnorm[j] > 1 { + // Scale by 1/cnorm[j] to avoid overflow when + // multiplying x[j] times column j. + rec /= cnorm[j] + } + bi.Dscal(n, rec, x, 1) + scale *= rec + xMax *= rec + } + x[j] /= tjjs + xj = math.Abs(x[j]) + default: + // A[j,j] == 0: Set x[0:n] = 0, x[j] = 1, and scale = 0, and + // compute a solution to A*x = 0. + for i := range x[:n] { + x[i] = 0 + } + x[j] = 1 + xj = 1 + scale = 0 + xMax = 0 + } + + // Scale x if necessary to avoid overflow when adding a multiple of + // column j of A. + switch { + case xj > 1: + rec := 1 / xj + if cnorm[j] > (bignum-xMax)*rec { + // Scale x by 1/(2*abs(x[j])). + rec *= 0.5 + bi.Dscal(n, rec, x, 1) + scale *= rec + } + case xj*cnorm[j] > bignum-xMax: + // Scale x by 1/2. + bi.Dscal(n, 0.5, x, 1) + scale *= 0.5 + } + + if uplo == blas.Upper { + if j > 0 { + // Compute the update + // x[max(0,j-kd):j] := x[max(0,j-kd):j] - x[j] * A[max(0,j-kd):j,j] + jlen := min(j, kd) + if jlen > 0 { + bi.Daxpy(jlen, -x[j]*tscal, ab[(j-jlen)*ldab+jlen:], kld, x[j-jlen:], 1) + } + i := bi.Idamax(j, x, 1) + xMax = math.Abs(x[i]) + } + } else if j < n-1 { + // Compute the update + // x[j+1:min(j+kd,n)] := x[j+1:min(j+kd,n)] - x[j] * A[j+1:min(j+kd,n),j] + jlen := min(kd, n-j-1) + if jlen > 0 { + bi.Daxpy(jlen, -x[j]*tscal, ab[(j+1)*ldab+kd-1:], kld, x[j+1:], 1) + } + i := j + 1 + bi.Idamax(n-j-1, x[j+1:], 1) + xMax = math.Abs(x[i]) + } + } + } else { + // Solve Aᵀ * x = b. + for j := jFirst; j != jLast; j += jInc { + // Compute x[j] = b[j] - sum A[k,j]*x[k]. + // k!=j + xj := math.Abs(x[j]) + tjjs := tscal + if diag == blas.NonUnit { + tjjs *= ab[j*ldab+maind] + } + tjj := math.Abs(tjjs) + rec := 1 / math.Max(1, xMax) + uscal := tscal + if cnorm[j] > (bignum-xj)*rec { + // If x[j] could overflow, scale x by 1/(2*xMax). + rec *= 0.5 + if tjj > 1 { + // Divide by A[j,j] when scaling x if A[j,j] > 1. + rec = math.Min(1, rec*tjj) + uscal /= tjjs + } + if rec < 1 { + bi.Dscal(n, rec, x, 1) + scale *= rec + xMax *= rec + } + } + + var sumj float64 + if uscal == 1 { + // If the scaling needed for A in the dot product is 1, call + // Ddot to perform the dot product... + if uplo == blas.Upper { + jlen := min(j, kd) + if jlen > 0 { + sumj = bi.Ddot(jlen, ab[(j-jlen)*ldab+jlen:], kld, x[j-jlen:], 1) + } + } else { + jlen := min(n-j-1, kd) + if jlen > 0 { + sumj = bi.Ddot(jlen, ab[(j+1)*ldab+kd-1:], kld, x[j+1:], 1) + } + } + } else { + // ...otherwise, use in-line code for the dot product. + if uplo == blas.Upper { + jlen := min(j, kd) + for i := 0; i < jlen; i++ { + sumj += (ab[(j-jlen+i)*ldab+jlen-i] * uscal) * x[j-jlen+i] + } + } else { + jlen := min(n-j-1, kd) + for i := 0; i < jlen; i++ { + sumj += (ab[(j+1+i)*ldab+kd-1-i] * uscal) * x[j+i+1] + } + } + } + + if uscal == tscal { + // Compute x[j] := ( x[j] - sumj ) / A[j,j] + // if 1/A[j,j] was not used to scale the dot product. + x[j] -= sumj + xj = math.Abs(x[j]) + // Compute x[j] = x[j] / A[j,j], scaling if necessary. + // Note: the reference implementation skips this step for blas.Unit matrices + // when tscal is equal to 1 but it complicates the logic and only saves + // the comparison and division in the first switch-case. Not skipping it + // is also consistent with the NoTrans case above. + switch { + case tjj > smlnum: + // smlnum < abs(A[j,j]): + if tjj < 1 && xj > tjj*bignum { + // Scale x by 1/abs(x[j]). + rec := 1 / xj + bi.Dscal(n, rec, x, 1) + scale *= rec + xMax *= rec + } + x[j] /= tjjs + case tjj > 0: + // 0 < abs(A[j,j]) <= smlnum: + if xj > tjj*bignum { + // Scale x by (1/abs(x[j]))*abs(A[j,j])*bignum. + rec := (tjj * bignum) / xj + bi.Dscal(n, rec, x, 1) + scale *= rec + xMax *= rec + } + x[j] /= tjjs + default: + // A[j,j] == 0: Set x[0:n] = 0, x[j] = 1, and scale = 0, and + // compute a solution Aᵀ * x = 0. + for i := range x[:n] { + x[i] = 0 + } + x[j] = 1 + scale = 0 + xMax = 0 + } + } else { + // Compute x[j] := x[j] / A[j,j] - sumj + // if the dot product has already been divided by 1/A[j,j]. + x[j] = x[j]/tjjs - sumj + } + xMax = math.Max(xMax, math.Abs(x[j])) + } + scale /= tscal + } + + // Scale the column norms by 1/tscal for return. + if tscal != 1 { + bi.Dscal(n, 1/tscal, cnorm, 1) + } + return scale +} diff --git a/vendor/gonum.org/v1/gonum/lapack/gonum/dlatdf.go b/vendor/gonum.org/v1/gonum/lapack/gonum/dlatdf.go new file mode 100644 index 0000000000..83422912b9 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dlatdf.go @@ -0,0 +1,175 @@ +// Copyright ©2021 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gonum + +import ( + "math" + + "gonum.org/v1/gonum/blas/blas64" + "gonum.org/v1/gonum/lapack" +) + +// Dlatdf computes a contribution to the reciprocal Dif-estimate by solving +// +// Z * x = h - f +// +// and choosing the vector h such that the norm of x is as large as possible. +// +// The n×n matrix Z is represented by its LU factorization as computed by Dgetc2 +// and has the form +// +// Z = P * L * U * Q +// +// where P and Q are permutation matrices, L is lower triangular with unit +// diagonal elements and U is upper triangular. +// +// job specifies the heuristic method for computing the contribution. +// +// If job is lapack.LocalLookAhead, all entries of h are chosen as either +1 or +// -1. +// +// If job is lapack.NormalizedNullVector, an approximate null-vector e of Z is +// computed using Dgecon and normalized. h is chosen as ±e with the sign giving +// the greater value of 2-norm(x). This strategy is about 5 times as expensive +// as LocalLookAhead. +// +// On entry, rhs holds the contribution f from earlier solved sub-systems. On +// return, rhs holds the solution x. +// +// ipiv and jpiv contain the pivot indices as returned by Dgetc2: row i of the +// matrix has been interchanged with row ipiv[i] and column j of the matrix has +// been interchanged with column jpiv[j]. +// +// n must be at most 8, ipiv and jpiv must have length n, and rhs must have +// length at least n, otherwise Dlatdf will panic. +// +// rdsum and rdscal represent the sum of squares of computed contributions to +// the Dif-estimate from earlier solved sub-systems. rdscal is the scaling +// factor used to prevent overflow in rdsum. Dlatdf returns this sum of squares +// updated with the contributions from the current sub-system. +// +// Dlatdf is an internal routine. It is exported for testing purposes. +func (impl Implementation) Dlatdf(job lapack.MaximizeNormXJob, n int, z []float64, ldz int, rhs []float64, rdsum, rdscal float64, ipiv, jpiv []int) (scale, sum float64) { + switch { + case job != lapack.LocalLookAhead && job != lapack.NormalizedNullVector: + panic(badMaximizeNormXJob) + case n < 0: + panic(nLT0) + case n > 8: + panic("lapack: n > 8") + case ldz < max(1, n): + panic(badLdZ) + } + + // Quick return if possible. + if n == 0 { + return + } + + switch { + case len(z) < (n-1)*ldz+n: + panic(shortZ) + case len(rhs) < n: + panic(shortRHS) + case len(ipiv) != n: + panic(badLenIpiv) + case len(jpiv) != n: + panic(badLenJpiv) + } + + const maxdim = 8 + var ( + xps [maxdim]float64 + xms [maxdim]float64 + work [4 * maxdim]float64 + iwork [maxdim]int + ) + bi := blas64.Implementation() + xp := xps[:n] + xm := xms[:n] + if job == lapack.NormalizedNullVector { + // Compute approximate nullvector xm of Z. + _ = impl.Dgecon(lapack.MaxRowSum, n, z, ldz, 1, work[:], iwork[:]) + // This relies on undocumented content in work[n:2*n] stored by Dgecon. + bi.Dcopy(n, work[n:], 1, xm, 1) + + // Compute rhs. + impl.Dlaswp(1, xm, 1, 0, n-2, ipiv[:n-1], -1) + tmp := 1 / bi.Dnrm2(n, xm, 1) + bi.Dscal(n, tmp, xm, 1) + bi.Dcopy(n, xm, 1, xp, 1) + bi.Daxpy(n, 1, rhs, 1, xp, 1) + bi.Daxpy(n, -1.0, xm, 1, rhs, 1) + _ = impl.Dgesc2(n, z, ldz, rhs, ipiv, jpiv) + _ = impl.Dgesc2(n, z, ldz, xp, ipiv, jpiv) + if bi.Dasum(n, xp, 1) > bi.Dasum(n, rhs, 1) { + bi.Dcopy(n, xp, 1, rhs, 1) + } + + // Compute and return the updated sum of squares. + return impl.Dlassq(n, rhs, 1, rdscal, rdsum) + } + + // Apply permutations ipiv to rhs + impl.Dlaswp(1, rhs, 1, 0, n-2, ipiv[:n-1], 1) + + // Solve for L-part choosing rhs either to +1 or -1. + pmone := -1.0 + for j := 0; j < n-2; j++ { + bp := rhs[j] + 1 + bm := rhs[j] - 1 + + // Look-ahead for L-part rhs[0:n-2] = +1 or -1, splus and sminu computed + // more efficiently than in https://doi.org/10.1109/9.29404. + splus := 1 + bi.Ddot(n-j-1, z[(j+1)*ldz+j:], ldz, z[(j+1)*ldz+j:], ldz) + sminu := bi.Ddot(n-j-1, z[(j+1)*ldz+j:], ldz, rhs[j+1:], 1) + splus *= rhs[j] + switch { + case splus > sminu: + rhs[j] = bp + case sminu > splus: + rhs[j] = bm + default: + // In this case the updating sums are equal and we can choose rsh[j] + // +1 or -1. The first time this happens we choose -1, thereafter + // +1. This is a simple way to get good estimates of matrices like + // Byers well-known example (see https://doi.org/10.1109/9.29404). + rhs[j] += pmone + pmone = 1 + } + + // Compute remaining rhs. + bi.Daxpy(n-j-1, -rhs[j], z[(j+1)*ldz+j:], ldz, rhs[j+1:], 1) + } + + // Solve for U-part, look-ahead for rhs[n-1] = ±1. This is not done in + // Bsolve and will hopefully give us a better estimate because any + // ill-conditioning of the original matrix is transferred to U and not to L. + // U[n-1,n-1] is an approximation to sigma_min(LU). + bi.Dcopy(n-1, rhs, 1, xp, 1) + xp[n-1] = rhs[n-1] + 1 + rhs[n-1] -= 1 + var splus, sminu float64 + for i := n - 1; i >= 0; i-- { + tmp := 1 / z[i*ldz+i] + xp[i] *= tmp + rhs[i] *= tmp + for k := i + 1; k < n; k++ { + xp[i] -= xp[k] * (z[i*ldz+k] * tmp) + rhs[i] -= rhs[k] * (z[i*ldz+k] * tmp) + } + splus += math.Abs(xp[i]) + sminu += math.Abs(rhs[i]) + } + if splus > sminu { + bi.Dcopy(n, xp, 1, rhs, 1) + } + + // Apply the permutations jpiv to the computed solution (rhs). + impl.Dlaswp(1, rhs, 1, 0, n-2, jpiv[:n-1], -1) + + // Compute and return the updated sum of squares. + return impl.Dlassq(n, rhs, 1, rdscal, rdsum) +} diff --git a/vendor/gonum.org/v1/gonum/lapack/gonum/dlatrd.go b/vendor/gonum.org/v1/gonum/lapack/gonum/dlatrd.go new file mode 100644 index 0000000000..195be09c9b --- /dev/null +++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dlatrd.go @@ -0,0 +1,176 @@ +// Copyright ©2016 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gonum + +import ( + "gonum.org/v1/gonum/blas" + "gonum.org/v1/gonum/blas/blas64" +) + +// Dlatrd reduces nb rows and columns of a real n×n symmetric matrix A to symmetric +// tridiagonal form. It computes the orthonormal similarity transformation +// +// Qᵀ * A * Q +// +// and returns the matrices V and W to apply to the unreduced part of A. If +// uplo == blas.Upper, the upper triangle is supplied and the last nb rows are +// reduced. If uplo == blas.Lower, the lower triangle is supplied and the first +// nb rows are reduced. +// +// a contains the symmetric matrix on entry with active triangular half specified +// by uplo. On exit, the nb columns have been reduced to tridiagonal form. The +// diagonal contains the diagonal of the reduced matrix, the off-diagonal is +// set to 1, and the remaining elements contain the data to construct Q. +// +// If uplo == blas.Upper, with n = 5 and nb = 2 on exit a is +// +// [ a a a v4 v5] +// [ a a v4 v5] +// [ a 1 v5] +// [ d 1] +// [ d] +// +// If uplo == blas.Lower, with n = 5 and nb = 2, on exit a is +// +// [ d ] +// [ 1 d ] +// [v1 1 a ] +// [v1 v2 a a ] +// [v1 v2 a a a] +// +// e contains the superdiagonal elements of the reduced matrix. If uplo == blas.Upper, +// e[n-nb:n-1] contains the last nb columns of the reduced matrix, while if +// uplo == blas.Lower, e[:nb] contains the first nb columns of the reduced matrix. +// e must have length at least n-1, and Dlatrd will panic otherwise. +// +// tau contains the scalar factors of the elementary reflectors needed to construct Q. +// The reflectors are stored in tau[n-nb:n-1] if uplo == blas.Upper, and in +// tau[:nb] if uplo == blas.Lower. tau must have length n-1, and Dlatrd will panic +// otherwise. +// +// w is an n×nb matrix. On exit it contains the data to update the unreduced part +// of A. +// +// The matrix Q is represented as a product of elementary reflectors. Each reflector +// H has the form +// +// I - tau * v * vᵀ +// +// If uplo == blas.Upper, +// +// Q = H_{n-1} * H_{n-2} * ... * H_{n-nb} +// +// where v[:i-1] is stored in A[:i-1,i], v[i-1] = 1, and v[i:n] = 0. +// +// If uplo == blas.Lower, +// +// Q = H_0 * H_1 * ... * H_{nb-1} +// +// where v[:i+1] = 0, v[i+1] = 1, and v[i+2:n] is stored in A[i+2:n,i]. +// +// The vectors v form the n×nb matrix V which is used with W to apply a +// symmetric rank-2 update to the unreduced part of A +// +// A = A - V * Wᵀ - W * Vᵀ +// +// Dlatrd is an internal routine. It is exported for testing purposes. +func (impl Implementation) Dlatrd(uplo blas.Uplo, n, nb int, a []float64, lda int, e, tau, w []float64, ldw int) { + switch { + case uplo != blas.Upper && uplo != blas.Lower: + panic(badUplo) + case n < 0: + panic(nLT0) + case nb < 0: + panic(nbLT0) + case nb > n: + panic(nbGTN) + case lda < max(1, n): + panic(badLdA) + case ldw < max(1, nb): + panic(badLdW) + } + + if n == 0 { + return + } + + switch { + case len(a) < (n-1)*lda+n: + panic(shortA) + case len(w) < (n-1)*ldw+nb: + panic(shortW) + case len(e) < n-1: + panic(shortE) + case len(tau) < n-1: + panic(shortTau) + } + + bi := blas64.Implementation() + + if uplo == blas.Upper { + for i := n - 1; i >= n-nb; i-- { + iw := i - n + nb + if i < n-1 { + // Update A(0:i, i). + bi.Dgemv(blas.NoTrans, i+1, n-i-1, -1, a[i+1:], lda, + w[i*ldw+iw+1:], 1, 1, a[i:], lda) + bi.Dgemv(blas.NoTrans, i+1, n-i-1, -1, w[iw+1:], ldw, + a[i*lda+i+1:], 1, 1, a[i:], lda) + } + if i > 0 { + // Generate elementary reflector H_i to annihilate A(0:i-2,i). + e[i-1], tau[i-1] = impl.Dlarfg(i, a[(i-1)*lda+i], a[i:], lda) + a[(i-1)*lda+i] = 1 + + // Compute W(0:i-1, i). + bi.Dsymv(blas.Upper, i, 1, a, lda, a[i:], lda, 0, w[iw:], ldw) + if i < n-1 { + bi.Dgemv(blas.Trans, i, n-i-1, 1, w[iw+1:], ldw, + a[i:], lda, 0, w[(i+1)*ldw+iw:], ldw) + bi.Dgemv(blas.NoTrans, i, n-i-1, -1, a[i+1:], lda, + w[(i+1)*ldw+iw:], ldw, 1, w[iw:], ldw) + bi.Dgemv(blas.Trans, i, n-i-1, 1, a[i+1:], lda, + a[i:], lda, 0, w[(i+1)*ldw+iw:], ldw) + bi.Dgemv(blas.NoTrans, i, n-i-1, -1, w[iw+1:], ldw, + w[(i+1)*ldw+iw:], ldw, 1, w[iw:], ldw) + } + bi.Dscal(i, tau[i-1], w[iw:], ldw) + alpha := -0.5 * tau[i-1] * bi.Ddot(i, w[iw:], ldw, a[i:], lda) + bi.Daxpy(i, alpha, a[i:], lda, w[iw:], ldw) + } + } + } else { + // Reduce first nb columns of lower triangle. + for i := 0; i < nb; i++ { + // Update A(i:n, i) + bi.Dgemv(blas.NoTrans, n-i, i, -1, a[i*lda:], lda, + w[i*ldw:], 1, 1, a[i*lda+i:], lda) + bi.Dgemv(blas.NoTrans, n-i, i, -1, w[i*ldw:], ldw, + a[i*lda:], 1, 1, a[i*lda+i:], lda) + if i < n-1 { + // Generate elementary reflector H_i to annihilate A(i+2:n,i). + e[i], tau[i] = impl.Dlarfg(n-i-1, a[(i+1)*lda+i], a[min(i+2, n-1)*lda+i:], lda) + a[(i+1)*lda+i] = 1 + + // Compute W(i+1:n,i). + bi.Dsymv(blas.Lower, n-i-1, 1, a[(i+1)*lda+i+1:], lda, + a[(i+1)*lda+i:], lda, 0, w[(i+1)*ldw+i:], ldw) + bi.Dgemv(blas.Trans, n-i-1, i, 1, w[(i+1)*ldw:], ldw, + a[(i+1)*lda+i:], lda, 0, w[i:], ldw) + bi.Dgemv(blas.NoTrans, n-i-1, i, -1, a[(i+1)*lda:], lda, + w[i:], ldw, 1, w[(i+1)*ldw+i:], ldw) + bi.Dgemv(blas.Trans, n-i-1, i, 1, a[(i+1)*lda:], lda, + a[(i+1)*lda+i:], lda, 0, w[i:], ldw) + bi.Dgemv(blas.NoTrans, n-i-1, i, -1, w[(i+1)*ldw:], ldw, + w[i:], ldw, 1, w[(i+1)*ldw+i:], ldw) + bi.Dscal(n-i-1, tau[i], w[(i+1)*ldw+i:], ldw) + alpha := -0.5 * tau[i] * bi.Ddot(n-i-1, w[(i+1)*ldw+i:], ldw, + a[(i+1)*lda+i:], lda) + bi.Daxpy(n-i-1, alpha, a[(i+1)*lda+i:], lda, + w[(i+1)*ldw+i:], ldw) + } + } + } +} diff --git a/vendor/gonum.org/v1/gonum/lapack/gonum/dlatrs.go b/vendor/gonum.org/v1/gonum/lapack/gonum/dlatrs.go new file mode 100644 index 0000000000..f13b7d57c0 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dlatrs.go @@ -0,0 +1,410 @@ +// Copyright ©2015 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gonum + +import ( + "math" + + "gonum.org/v1/gonum/blas" + "gonum.org/v1/gonum/blas/blas64" + "gonum.org/v1/gonum/lapack" +) + +// Dlatrs solves a triangular system of equations scaled to prevent overflow. It +// solves +// +// A * x = scale * b if trans == blas.NoTrans +// Aᵀ * x = scale * b if trans == blas.Trans +// +// where the scale s is set for numeric stability. +// +// A is an n×n triangular matrix. On entry, the slice x contains the values of +// b, and on exit it contains the solution vector x. +// +// If normin == true, cnorm is an input and cnorm[j] contains the norm of the off-diagonal +// part of the j^th column of A. If trans == blas.NoTrans, cnorm[j] must be greater +// than or equal to the infinity norm, and greater than or equal to the one-norm +// otherwise. If normin == false, then cnorm is treated as an output, and is set +// to contain the 1-norm of the off-diagonal part of the j^th column of A. +// +// Dlatrs is an internal routine. It is exported for testing purposes. +func (impl Implementation) Dlatrs(uplo blas.Uplo, trans blas.Transpose, diag blas.Diag, normin bool, n int, a []float64, lda int, x []float64, cnorm []float64) (scale float64) { + switch { + case uplo != blas.Upper && uplo != blas.Lower: + panic(badUplo) + case trans != blas.NoTrans && trans != blas.Trans && trans != blas.ConjTrans: + panic(badTrans) + case diag != blas.Unit && diag != blas.NonUnit: + panic(badDiag) + case n < 0: + panic(nLT0) + case lda < max(1, n): + panic(badLdA) + } + + // Quick return if possible. + if n == 0 { + return 1 + } + + switch { + case len(a) < (n-1)*lda+n: + panic(shortA) + case len(x) < n: + panic(shortX) + case len(cnorm) < n: + panic(shortCNorm) + } + + upper := uplo == blas.Upper + nonUnit := diag == blas.NonUnit + + smlnum := dlamchS / dlamchP + bignum := 1 / smlnum + scale = 1 + + bi := blas64.Implementation() + + if !normin { + if upper { + cnorm[0] = 0 + for j := 1; j < n; j++ { + cnorm[j] = bi.Dasum(j, a[j:], lda) + } + } else { + for j := 0; j < n-1; j++ { + cnorm[j] = bi.Dasum(n-j-1, a[(j+1)*lda+j:], lda) + } + cnorm[n-1] = 0 + } + } + // Scale the column norms by tscal if the maximum element in cnorm is greater than bignum. + imax := bi.Idamax(n, cnorm, 1) + var tscal float64 + if cnorm[imax] <= bignum { + tscal = 1 + } else { + tmax := cnorm[imax] + // Avoid NaN generation if entries in cnorm exceed the overflow + // threshold. + if tmax <= math.MaxFloat64 { + // Case 1: All entries in cnorm are valid floating-point numbers. + tscal = 1 / (smlnum * tmax) + bi.Dscal(n, tscal, cnorm, 1) + } else { + // Case 2: At least one column norm of A cannot be represented as + // floating-point number. Find the offdiagonal entry A[i,j] with the + // largest absolute value. If this entry is not +/- Infinity, use + // this value as tscal. + tmax = 0 + if upper { + // A is upper triangular. + for j := 1; j < n; j++ { + tmax = math.Max(impl.Dlange(lapack.MaxAbs, j, 1, a[j:], lda, nil), tmax) + } + } else { + // A is lower triangular. + for j := 0; j < n-1; j++ { + tmax = math.Max(impl.Dlange(lapack.MaxAbs, n-j-1, 1, a[(j+1)*lda+j:], lda, nil), tmax) + } + } + if tmax <= math.MaxFloat64 { + tscal = 1 / (smlnum * tmax) + for j := 0; j < n; j++ { + if cnorm[j] <= math.MaxFloat64 { + cnorm[j] *= tscal + } else { + // Recompute the 1-norm without introducing Infinity in + // the summation. + cnorm[j] = 0 + if upper { + for i := 0; i < j; i++ { + cnorm[j] += tscal * math.Abs(a[i*lda+j]) + } + } else { + for i := j + 1; i < n; i++ { + cnorm[j] += tscal * math.Abs(a[i*lda+j]) + } + } + } + } + } else { + // At least one entry of A is not a valid floating-point entry. + // Rely on Dtrsv to propagate Inf and NaN. + bi.Dtrsv(uplo, trans, diag, n, a, lda, x, 1) + return + } + } + } + + // Compute a bound on the computed solution vector to see if bi.Dtrsv can be used. + j := bi.Idamax(n, x, 1) + xmax := math.Abs(x[j]) + xbnd := xmax + var grow float64 + var jfirst, jlast, jinc int + if trans == blas.NoTrans { + if upper { + jfirst = n - 1 + jlast = -1 + jinc = -1 + } else { + jfirst = 0 + jlast = n + jinc = 1 + } + // Compute the growth in A * x = b. + if tscal != 1 { + grow = 0 + goto Solve + } + if nonUnit { + grow = 1 / math.Max(xbnd, smlnum) + xbnd = grow + for j := jfirst; j != jlast; j += jinc { + if grow <= smlnum { + goto Solve + } + tjj := math.Abs(a[j*lda+j]) + xbnd = math.Min(xbnd, math.Min(1, tjj)*grow) + if tjj+cnorm[j] >= smlnum { + grow *= tjj / (tjj + cnorm[j]) + } else { + grow = 0 + } + } + grow = xbnd + } else { + grow = math.Min(1, 1/math.Max(xbnd, smlnum)) + for j := jfirst; j != jlast; j += jinc { + if grow <= smlnum { + goto Solve + } + grow *= 1 / (1 + cnorm[j]) + } + } + } else { + if upper { + jfirst = 0 + jlast = n + jinc = 1 + } else { + jfirst = n - 1 + jlast = -1 + jinc = -1 + } + if tscal != 1 { + grow = 0 + goto Solve + } + if nonUnit { + grow = 1 / (math.Max(xbnd, smlnum)) + xbnd = grow + for j := jfirst; j != jlast; j += jinc { + if grow <= smlnum { + goto Solve + } + xj := 1 + cnorm[j] + grow = math.Min(grow, xbnd/xj) + tjj := math.Abs(a[j*lda+j]) + if xj > tjj { + xbnd *= tjj / xj + } + } + grow = math.Min(grow, xbnd) + } else { + grow = math.Min(1, 1/math.Max(xbnd, smlnum)) + for j := jfirst; j != jlast; j += jinc { + if grow <= smlnum { + goto Solve + } + xj := 1 + cnorm[j] + grow /= xj + } + } + } + +Solve: + if grow*tscal > smlnum { + // Use the Level 2 BLAS solve if the reciprocal of the bound on + // elements of X is not too small. + bi.Dtrsv(uplo, trans, diag, n, a, lda, x, 1) + if tscal != 1 { + bi.Dscal(n, 1/tscal, cnorm, 1) + } + return scale + } + + // Use a Level 1 BLAS solve, scaling intermediate results. + if xmax > bignum { + scale = bignum / xmax + bi.Dscal(n, scale, x, 1) + xmax = bignum + } + if trans == blas.NoTrans { + for j := jfirst; j != jlast; j += jinc { + xj := math.Abs(x[j]) + var tjj, tjjs float64 + if nonUnit { + tjjs = a[j*lda+j] * tscal + } else { + tjjs = tscal + if tscal == 1 { + goto Skip1 + } + } + tjj = math.Abs(tjjs) + if tjj > smlnum { + if tjj < 1 { + if xj > tjj*bignum { + rec := 1 / xj + bi.Dscal(n, rec, x, 1) + scale *= rec + xmax *= rec + } + } + x[j] /= tjjs + xj = math.Abs(x[j]) + } else if tjj > 0 { + if xj > tjj*bignum { + rec := (tjj * bignum) / xj + if cnorm[j] > 1 { + rec /= cnorm[j] + } + bi.Dscal(n, rec, x, 1) + scale *= rec + xmax *= rec + } + x[j] /= tjjs + xj = math.Abs(x[j]) + } else { + for i := 0; i < n; i++ { + x[i] = 0 + } + x[j] = 1 + xj = 1 + scale = 0 + xmax = 0 + } + Skip1: + if xj > 1 { + rec := 1 / xj + if cnorm[j] > (bignum-xmax)*rec { + rec *= 0.5 + bi.Dscal(n, rec, x, 1) + scale *= rec + } + } else if xj*cnorm[j] > bignum-xmax { + bi.Dscal(n, 0.5, x, 1) + scale *= 0.5 + } + if upper { + if j > 0 { + bi.Daxpy(j, -x[j]*tscal, a[j:], lda, x, 1) + i := bi.Idamax(j, x, 1) + xmax = math.Abs(x[i]) + } + } else { + if j < n-1 { + bi.Daxpy(n-j-1, -x[j]*tscal, a[(j+1)*lda+j:], lda, x[j+1:], 1) + i := j + bi.Idamax(n-j-1, x[j+1:], 1) + xmax = math.Abs(x[i]) + } + } + } + } else { + for j := jfirst; j != jlast; j += jinc { + xj := math.Abs(x[j]) + uscal := tscal + rec := 1 / math.Max(xmax, 1) + var tjjs float64 + if cnorm[j] > (bignum-xj)*rec { + rec *= 0.5 + if nonUnit { + tjjs = a[j*lda+j] * tscal + } else { + tjjs = tscal + } + tjj := math.Abs(tjjs) + if tjj > 1 { + rec = math.Min(1, rec*tjj) + uscal /= tjjs + } + if rec < 1 { + bi.Dscal(n, rec, x, 1) + scale *= rec + xmax *= rec + } + } + var sumj float64 + if uscal == 1 { + if upper { + sumj = bi.Ddot(j, a[j:], lda, x, 1) + } else if j < n-1 { + sumj = bi.Ddot(n-j-1, a[(j+1)*lda+j:], lda, x[j+1:], 1) + } + } else { + if upper { + for i := 0; i < j; i++ { + sumj += (a[i*lda+j] * uscal) * x[i] + } + } else if j < n { + for i := j + 1; i < n; i++ { + sumj += (a[i*lda+j] * uscal) * x[i] + } + } + } + if uscal == tscal { + x[j] -= sumj + xj := math.Abs(x[j]) + var tjjs float64 + if nonUnit { + tjjs = a[j*lda+j] * tscal + } else { + tjjs = tscal + if tscal == 1 { + goto Skip2 + } + } + tjj := math.Abs(tjjs) + if tjj > smlnum { + if tjj < 1 { + if xj > tjj*bignum { + rec = 1 / xj + bi.Dscal(n, rec, x, 1) + scale *= rec + xmax *= rec + } + } + x[j] /= tjjs + } else if tjj > 0 { + if xj > tjj*bignum { + rec = (tjj * bignum) / xj + bi.Dscal(n, rec, x, 1) + scale *= rec + xmax *= rec + } + x[j] /= tjjs + } else { + for i := 0; i < n; i++ { + x[i] = 0 + } + x[j] = 1 + scale = 0 + xmax = 0 + } + } else { + x[j] = x[j]/tjjs - sumj + } + Skip2: + xmax = math.Max(xmax, math.Abs(x[j])) + } + } + scale /= tscal + if tscal != 1 { + bi.Dscal(n, 1/tscal, cnorm, 1) + } + return scale +} diff --git a/vendor/gonum.org/v1/gonum/lapack/gonum/dlauu2.go b/vendor/gonum.org/v1/gonum/lapack/gonum/dlauu2.go new file mode 100644 index 0000000000..b70a8420d5 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dlauu2.go @@ -0,0 +1,66 @@ +// Copyright ©2018 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gonum + +import ( + "gonum.org/v1/gonum/blas" + "gonum.org/v1/gonum/blas/blas64" +) + +// Dlauu2 computes the product +// +// U * Uᵀ if uplo is blas.Upper +// Lᵀ * L if uplo is blas.Lower +// +// where U or L is stored in the upper or lower triangular part of A. +// Only the upper or lower triangle of the result is stored, overwriting +// the corresponding factor in A. +func (impl Implementation) Dlauu2(uplo blas.Uplo, n int, a []float64, lda int) { + switch { + case uplo != blas.Upper && uplo != blas.Lower: + panic(badUplo) + case n < 0: + panic(nLT0) + case lda < max(1, n): + panic(badLdA) + } + + // Quick return if possible. + if n == 0 { + return + } + + if len(a) < (n-1)*lda+n { + panic(shortA) + } + + bi := blas64.Implementation() + + if uplo == blas.Upper { + // Compute the product U*Uᵀ. + for i := 0; i < n; i++ { + aii := a[i*lda+i] + if i < n-1 { + a[i*lda+i] = bi.Ddot(n-i, a[i*lda+i:], 1, a[i*lda+i:], 1) + bi.Dgemv(blas.NoTrans, i, n-i-1, 1, a[i+1:], lda, a[i*lda+i+1:], 1, + aii, a[i:], lda) + } else { + bi.Dscal(i+1, aii, a[i:], lda) + } + } + } else { + // Compute the product Lᵀ*L. + for i := 0; i < n; i++ { + aii := a[i*lda+i] + if i < n-1 { + a[i*lda+i] = bi.Ddot(n-i, a[i*lda+i:], lda, a[i*lda+i:], lda) + bi.Dgemv(blas.Trans, n-i-1, i, 1, a[(i+1)*lda:], lda, a[(i+1)*lda+i:], lda, + aii, a[i*lda:], 1) + } else { + bi.Dscal(i+1, aii, a[i*lda:], 1) + } + } + } +} diff --git a/vendor/gonum.org/v1/gonum/lapack/gonum/dlauum.go b/vendor/gonum.org/v1/gonum/lapack/gonum/dlauum.go new file mode 100644 index 0000000000..575ed7c88f --- /dev/null +++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dlauum.go @@ -0,0 +1,83 @@ +// Copyright ©2018 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gonum + +import ( + "gonum.org/v1/gonum/blas" + "gonum.org/v1/gonum/blas/blas64" +) + +// Dlauum computes the product +// +// U * Uᵀ if uplo is blas.Upper +// Lᵀ * L if uplo is blas.Lower +// +// where U or L is stored in the upper or lower triangular part of A. +// Only the upper or lower triangle of the result is stored, overwriting +// the corresponding factor in A. +func (impl Implementation) Dlauum(uplo blas.Uplo, n int, a []float64, lda int) { + switch { + case uplo != blas.Upper && uplo != blas.Lower: + panic(badUplo) + case n < 0: + panic(nLT0) + case lda < max(1, n): + panic(badLdA) + } + + // Quick return if possible. + if n == 0 { + return + } + + if len(a) < (n-1)*lda+n { + panic(shortA) + } + + // Determine the block size. + opts := "U" + if uplo == blas.Lower { + opts = "L" + } + nb := impl.Ilaenv(1, "DLAUUM", opts, n, -1, -1, -1) + + if nb <= 1 || n <= nb { + // Use unblocked code. + impl.Dlauu2(uplo, n, a, lda) + return + } + + // Use blocked code. + bi := blas64.Implementation() + if uplo == blas.Upper { + // Compute the product U*Uᵀ. + for i := 0; i < n; i += nb { + ib := min(nb, n-i) + bi.Dtrmm(blas.Right, blas.Upper, blas.Trans, blas.NonUnit, + i, ib, 1, a[i*lda+i:], lda, a[i:], lda) + impl.Dlauu2(blas.Upper, ib, a[i*lda+i:], lda) + if n-i-ib > 0 { + bi.Dgemm(blas.NoTrans, blas.Trans, i, ib, n-i-ib, + 1, a[i+ib:], lda, a[i*lda+i+ib:], lda, 1, a[i:], lda) + bi.Dsyrk(blas.Upper, blas.NoTrans, ib, n-i-ib, + 1, a[i*lda+i+ib:], lda, 1, a[i*lda+i:], lda) + } + } + } else { + // Compute the product Lᵀ*L. + for i := 0; i < n; i += nb { + ib := min(nb, n-i) + bi.Dtrmm(blas.Left, blas.Lower, blas.Trans, blas.NonUnit, + ib, i, 1, a[i*lda+i:], lda, a[i*lda:], lda) + impl.Dlauu2(blas.Lower, ib, a[i*lda+i:], lda) + if n-i-ib > 0 { + bi.Dgemm(blas.Trans, blas.NoTrans, ib, i, n-i-ib, + 1, a[(i+ib)*lda+i:], lda, a[(i+ib)*lda:], lda, 1, a[i*lda:], lda) + bi.Dsyrk(blas.Lower, blas.Trans, ib, n-i-ib, + 1, a[(i+ib)*lda+i:], lda, 1, a[i*lda+i:], lda) + } + } + } +} diff --git a/vendor/gonum.org/v1/gonum/lapack/gonum/doc.go b/vendor/gonum.org/v1/gonum/lapack/gonum/doc.go new file mode 100644 index 0000000000..087f63cc6e --- /dev/null +++ b/vendor/gonum.org/v1/gonum/lapack/gonum/doc.go @@ -0,0 +1,28 @@ +// Copyright ©2015 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package gonum is a pure-go implementation of the LAPACK API. The LAPACK API defines +// a set of algorithms for advanced matrix operations. +// +// The function definitions and implementations follow that of the netlib reference +// implementation. See http://www.netlib.org/lapack/explore-html/ for more +// information, and http://www.netlib.org/lapack/explore-html/d4/de1/_l_i_c_e_n_s_e_source.html +// for more license information. +// +// Slice function arguments frequently represent vectors and matrices. The data +// layout is identical to that found in https://pkg.go.dev/gonum.org/v1/gonum/blas/gonum. +// +// Most LAPACK functions are built on top the routines defined in the BLAS API, +// and as such the computation time for many LAPACK functions is +// dominated by BLAS calls. Here, BLAS is accessed through the +// blas64 package (https://pkg.go.dev/gonum.org/v1/gonum/blas/blas64). In particular, +// this implies that an external BLAS library will be used if it is +// registered in blas64. +// +// The full LAPACK capability has not been implemented at present. The full +// API is very large, containing approximately 200 functions for double precision +// alone. Future additions will be focused on supporting the Gonum matrix +// package (https://pkg.go.dev/gonum.org/v1/gonum/mat), though pull requests +// with implementations and tests for LAPACK function are encouraged. +package gonum // import "gonum.org/v1/gonum/lapack/gonum" diff --git a/vendor/gonum.org/v1/gonum/lapack/gonum/dorg2l.go b/vendor/gonum.org/v1/gonum/lapack/gonum/dorg2l.go new file mode 100644 index 0000000000..fdb37af2a7 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dorg2l.go @@ -0,0 +1,78 @@ +// Copyright ©2016 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gonum + +import ( + "gonum.org/v1/gonum/blas" + "gonum.org/v1/gonum/blas/blas64" +) + +// Dorg2l generates an m×n matrix Q with orthonormal columns which is defined +// as the last n columns of a product of k elementary reflectors of order m. +// +// Q = H_{k-1} * ... * H_1 * H_0 +// +// See Dgelqf for more information. It must be that m >= n >= k. +// +// tau contains the scalar reflectors computed by Dgeqlf. tau must have length +// at least k, and Dorg2l will panic otherwise. +// +// work contains temporary memory, and must have length at least n. Dorg2l will +// panic otherwise. +// +// Dorg2l is an internal routine. It is exported for testing purposes. +func (impl Implementation) Dorg2l(m, n, k int, a []float64, lda int, tau, work []float64) { + switch { + case m < 0: + panic(mLT0) + case n < 0: + panic(nLT0) + case n > m: + panic(nGTM) + case k < 0: + panic(kLT0) + case k > n: + panic(kGTN) + case lda < max(1, n): + panic(badLdA) + } + + if n == 0 { + return + } + + switch { + case len(a) < (m-1)*lda+n: + panic(shortA) + case len(tau) < k: + panic(shortTau) + case len(work) < n: + panic(shortWork) + } + + // Initialize columns 0:n-k to columns of the unit matrix. + for j := 0; j < n-k; j++ { + for l := 0; l < m; l++ { + a[l*lda+j] = 0 + } + a[(m-n+j)*lda+j] = 1 + } + + bi := blas64.Implementation() + for i := 0; i < k; i++ { + ii := n - k + i + + // Apply H_i to A[0:m-k+i, 0:n-k+i] from the left. + a[(m-n+ii)*lda+ii] = 1 + impl.Dlarf(blas.Left, m-n+ii+1, ii, a[ii:], lda, tau[i], a, lda, work) + bi.Dscal(m-n+ii, -tau[i], a[ii:], lda) + a[(m-n+ii)*lda+ii] = 1 - tau[i] + + // Set A[m-k+i:m, n-k+i+1] to zero. + for l := m - n + ii + 1; l < m; l++ { + a[l*lda+ii] = 0 + } + } +} diff --git a/vendor/gonum.org/v1/gonum/lapack/gonum/dorg2r.go b/vendor/gonum.org/v1/gonum/lapack/gonum/dorg2r.go new file mode 100644 index 0000000000..c56f24cbd9 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dorg2r.go @@ -0,0 +1,77 @@ +// Copyright ©2015 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gonum + +import ( + "gonum.org/v1/gonum/blas" + "gonum.org/v1/gonum/blas/blas64" +) + +// Dorg2r generates an m×n matrix Q with orthonormal columns defined by the +// product of elementary reflectors as computed by Dgeqrf. +// +// Q = H_0 * H_1 * ... * H_{k-1} +// +// len(tau) = k, 0 <= k <= n, 0 <= n <= m, len(work) >= n. +// Dorg2r will panic if these conditions are not met. +// +// Dorg2r is an internal routine. It is exported for testing purposes. +func (impl Implementation) Dorg2r(m, n, k int, a []float64, lda int, tau []float64, work []float64) { + switch { + case m < 0: + panic(mLT0) + case n < 0: + panic(nLT0) + case n > m: + panic(nGTM) + case k < 0: + panic(kLT0) + case k > n: + panic(kGTN) + case lda < max(1, n): + panic(badLdA) + } + + if n == 0 { + return + } + + switch { + case len(a) < (m-1)*lda+n: + panic(shortA) + case len(tau) != k: + panic(badLenTau) + case len(work) < n: + panic(shortWork) + } + + bi := blas64.Implementation() + + // Initialize columns k+1:n to columns of the unit matrix. + for l := 0; l < m; l++ { + for j := k; j < n; j++ { + a[l*lda+j] = 0 + } + } + for j := k; j < n; j++ { + a[j*lda+j] = 1 + } + for i := k - 1; i >= 0; i-- { + for i := range work { + work[i] = 0 + } + if i < n-1 { + a[i*lda+i] = 1 + impl.Dlarf(blas.Left, m-i, n-i-1, a[i*lda+i:], lda, tau[i], a[i*lda+i+1:], lda, work) + } + if i < m-1 { + bi.Dscal(m-i-1, -tau[i], a[(i+1)*lda+i:], lda) + } + a[i*lda+i] = 1 - tau[i] + for l := 0; l < i; l++ { + a[l*lda+i] = 0 + } + } +} diff --git a/vendor/gonum.org/v1/gonum/lapack/gonum/dorgbr.go b/vendor/gonum.org/v1/gonum/lapack/gonum/dorgbr.go new file mode 100644 index 0000000000..35535100b6 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dorgbr.go @@ -0,0 +1,138 @@ +// Copyright ©2015 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gonum + +import "gonum.org/v1/gonum/lapack" + +// Dorgbr generates one of the matrices Q or Pᵀ computed by Dgebrd +// computed from the decomposition Dgebrd. See Dgebd2 for the description of +// Q and Pᵀ. +// +// If vect == lapack.GenerateQ, then a is assumed to have been an m×k matrix and +// Q is of order m. If m >= k, then Dorgbr returns the first n columns of Q +// where m >= n >= k. If m < k, then Dorgbr returns Q as an m×m matrix. +// +// If vect == lapack.GeneratePT, then A is assumed to have been a k×n matrix, and +// Pᵀ is of order n. If k < n, then Dorgbr returns the first m rows of Pᵀ, +// where n >= m >= k. If k >= n, then Dorgbr returns Pᵀ as an n×n matrix. +// +// Dorgbr is an internal routine. It is exported for testing purposes. +func (impl Implementation) Dorgbr(vect lapack.GenOrtho, m, n, k int, a []float64, lda int, tau, work []float64, lwork int) { + wantq := vect == lapack.GenerateQ + mn := min(m, n) + switch { + case vect != lapack.GenerateQ && vect != lapack.GeneratePT: + panic(badGenOrtho) + case m < 0: + panic(mLT0) + case n < 0: + panic(nLT0) + case k < 0: + panic(kLT0) + case wantq && n > m: + panic(nGTM) + case wantq && n < min(m, k): + panic("lapack: n < min(m,k)") + case !wantq && m > n: + panic(mGTN) + case !wantq && m < min(n, k): + panic("lapack: m < min(n,k)") + case lda < max(1, n) && lwork != -1: + // Normally, we follow the reference and require the leading + // dimension to be always valid, even in case of workspace + // queries. However, if a caller provided a placeholder value + // for lda (and a) when doing a workspace query that didn't + // fulfill the condition here, it would cause a panic. This is + // exactly what Dgesvd does. + panic(badLdA) + case lwork < max(1, mn) && lwork != -1: + panic(badLWork) + case len(work) < max(1, lwork): + panic(shortWork) + } + + // Quick return if possible. + work[0] = 1 + if m == 0 || n == 0 { + return + } + + if wantq { + if m >= k { + impl.Dorgqr(m, n, k, a, lda, tau, work, -1) + } else if m > 1 { + impl.Dorgqr(m-1, m-1, m-1, a[lda+1:], lda, tau, work, -1) + } + } else { + if k < n { + impl.Dorglq(m, n, k, a, lda, tau, work, -1) + } else if n > 1 { + impl.Dorglq(n-1, n-1, n-1, a[lda+1:], lda, tau, work, -1) + } + } + lworkopt := int(work[0]) + lworkopt = max(lworkopt, mn) + if lwork == -1 { + work[0] = float64(lworkopt) + return + } + + switch { + case len(a) < (m-1)*lda+n: + panic(shortA) + case wantq && len(tau) < min(m, k): + panic(shortTau) + case !wantq && len(tau) < min(n, k): + panic(shortTau) + } + + if wantq { + // Form Q, determined by a call to Dgebrd to reduce an m×k matrix. + if m >= k { + impl.Dorgqr(m, n, k, a, lda, tau[:k], work, lwork) + } else { + // Shift the vectors which define the elementary reflectors one + // column to the right, and set the first row and column of Q to + // those of the unit matrix. + for j := m - 1; j >= 1; j-- { + a[j] = 0 + for i := j + 1; i < m; i++ { + a[i*lda+j] = a[i*lda+j-1] + } + } + a[0] = 1 + for i := 1; i < m; i++ { + a[i*lda] = 0 + } + if m > 1 { + // Form Q[1:m-1, 1:m-1] + impl.Dorgqr(m-1, m-1, m-1, a[lda+1:], lda, tau[:m-1], work, lwork) + } + } + } else { + // Form Pᵀ, determined by a call to Dgebrd to reduce a k×n matrix. + if k < n { + impl.Dorglq(m, n, k, a, lda, tau, work, lwork) + } else { + // Shift the vectors which define the elementary reflectors one + // row downward, and set the first row and column of Pᵀ to + // those of the unit matrix. + a[0] = 1 + for i := 1; i < n; i++ { + a[i*lda] = 0 + } + for j := 1; j < n; j++ { + for i := j - 1; i >= 1; i-- { + a[i*lda+j] = a[(i-1)*lda+j] + } + a[j] = 0 + } + if n > 1 { + impl.Dorglq(n-1, n-1, n-1, a[lda+1:], lda, tau, work, lwork) + } + } + } + work[0] = float64(lworkopt) +} diff --git a/vendor/gonum.org/v1/gonum/lapack/gonum/dorghr.go b/vendor/gonum.org/v1/gonum/lapack/gonum/dorghr.go new file mode 100644 index 0000000000..8f0dd452ec --- /dev/null +++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dorghr.go @@ -0,0 +1,103 @@ +// Copyright ©2016 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gonum + +// Dorghr generates an n×n orthogonal matrix Q which is defined as the product +// of ihi-ilo elementary reflectors: +// +// Q = H_{ilo} H_{ilo+1} ... H_{ihi-1}. +// +// a and lda represent an n×n matrix that contains the elementary reflectors, as +// returned by Dgehrd. On return, a is overwritten by the n×n orthogonal matrix +// Q. Q will be equal to the identity matrix except in the submatrix +// Q[ilo+1:ihi+1,ilo+1:ihi+1]. +// +// ilo and ihi must have the same values as in the previous call of Dgehrd. It +// must hold that +// +// 0 <= ilo <= ihi < n if n > 0, +// ilo = 0, ihi = -1 if n == 0. +// +// tau contains the scalar factors of the elementary reflectors, as returned by +// Dgehrd. tau must have length n-1. +// +// work must have length at least max(1,lwork) and lwork must be at least +// ihi-ilo. For optimum performance lwork must be at least (ihi-ilo)*nb where nb +// is the optimal blocksize. On return, work[0] will contain the optimal value +// of lwork. +// +// If lwork == -1, instead of performing Dorghr, only the optimal value of lwork +// will be stored into work[0]. +// +// If any requirement on input sizes is not met, Dorghr will panic. +// +// Dorghr is an internal routine. It is exported for testing purposes. +func (impl Implementation) Dorghr(n, ilo, ihi int, a []float64, lda int, tau, work []float64, lwork int) { + nh := ihi - ilo + switch { + case ilo < 0 || max(1, n) <= ilo: + panic(badIlo) + case ihi < min(ilo, n-1) || n <= ihi: + panic(badIhi) + case lda < max(1, n): + panic(badLdA) + case lwork < max(1, nh) && lwork != -1: + panic(badLWork) + case len(work) < max(1, lwork): + panic(shortWork) + } + + // Quick return if possible. + if n == 0 { + work[0] = 1 + return + } + + lwkopt := max(1, nh) * impl.Ilaenv(1, "DORGQR", " ", nh, nh, nh, -1) + if lwork == -1 { + work[0] = float64(lwkopt) + return + } + + switch { + case len(a) < (n-1)*lda+n: + panic(shortA) + case len(tau) < n-1: + panic(shortTau) + } + + // Shift the vectors which define the elementary reflectors one column + // to the right. + for i := ilo + 2; i < ihi+1; i++ { + copy(a[i*lda+ilo+1:i*lda+i], a[i*lda+ilo:i*lda+i-1]) + } + // Set the first ilo+1 and the last n-ihi-1 rows and columns to those of + // the identity matrix. + for i := 0; i < ilo+1; i++ { + for j := 0; j < n; j++ { + a[i*lda+j] = 0 + } + a[i*lda+i] = 1 + } + for i := ilo + 1; i < ihi+1; i++ { + for j := 0; j <= ilo; j++ { + a[i*lda+j] = 0 + } + for j := i; j < n; j++ { + a[i*lda+j] = 0 + } + } + for i := ihi + 1; i < n; i++ { + for j := 0; j < n; j++ { + a[i*lda+j] = 0 + } + a[i*lda+i] = 1 + } + if nh > 0 { + // Generate Q[ilo+1:ihi+1,ilo+1:ihi+1]. + impl.Dorgqr(nh, nh, nh, a[(ilo+1)*lda+ilo+1:], lda, tau[ilo:ihi], work, lwork) + } + work[0] = float64(lwkopt) +} diff --git a/vendor/gonum.org/v1/gonum/lapack/gonum/dorgl2.go b/vendor/gonum.org/v1/gonum/lapack/gonum/dorgl2.go new file mode 100644 index 0000000000..6dd9a88863 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dorgl2.go @@ -0,0 +1,79 @@ +// Copyright ©2015 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gonum + +import ( + "gonum.org/v1/gonum/blas" + "gonum.org/v1/gonum/blas/blas64" +) + +// Dorgl2 generates an m×n matrix Q with orthonormal rows defined as the first m +// rows of a product of k elementary reflectors of order n +// +// Q = H_{k-1} * ... * H_0 +// +// as returned by Dgelqf. +// +// On entry, tau and the first k rows of A must contain the scalar factors and +// the vectors, respectively, which define the elementary reflectors H_i, +// i=0,...,k-1, as returned by Dgelqf. On return, A contains the matrix Q. +// +// tau must have length at least k, work must have length at least m, and it +// must hold that 0 <= k <= m <= n, otherwise Dorgl2 will panic. +// +// Dorgl2 is an internal routine. It is exported for testing purposes. +func (impl Implementation) Dorgl2(m, n, k int, a []float64, lda int, tau, work []float64) { + switch { + case m < 0: + panic(mLT0) + case n < m: + panic(nLTM) + case k < 0: + panic(kLT0) + case k > m: + panic(kGTM) + case lda < max(1, n): + panic(badLdA) + } + + if m == 0 { + return + } + + switch { + case len(a) < (m-1)*lda+n: + panic(shortA) + case len(tau) < k: + panic(shortTau) + case len(work) < m: + panic(shortWork) + } + + bi := blas64.Implementation() + + if k < m { + for i := k; i < m; i++ { + for j := 0; j < n; j++ { + a[i*lda+j] = 0 + } + } + for j := k; j < m; j++ { + a[j*lda+j] = 1 + } + } + for i := k - 1; i >= 0; i-- { + if i < n-1 { + if i < m-1 { + a[i*lda+i] = 1 + impl.Dlarf(blas.Right, m-i-1, n-i, a[i*lda+i:], 1, tau[i], a[(i+1)*lda+i:], lda, work) + } + bi.Dscal(n-i-1, -tau[i], a[i*lda+i+1:], 1) + } + a[i*lda+i] = 1 - tau[i] + for l := 0; l < i; l++ { + a[i*lda+l] = 0 + } + } +} diff --git a/vendor/gonum.org/v1/gonum/lapack/gonum/dorglq.go b/vendor/gonum.org/v1/gonum/lapack/gonum/dorglq.go new file mode 100644 index 0000000000..d6b3aadfca --- /dev/null +++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dorglq.go @@ -0,0 +1,125 @@ +// Copyright ©2015 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gonum + +import ( + "gonum.org/v1/gonum/blas" + "gonum.org/v1/gonum/lapack" +) + +// Dorglq generates an m×n matrix Q with orthonormal rows defined as the first m +// rows of a product of k elementary reflectors of order n +// +// Q = H_{k-1} * ... * H_0 +// +// as returned by Dgelqf. +// +// On entry, tau and the first k rows of A must contain the scalar factors and +// the vectors, respectively, which define the elementary reflectors H_i, +// i=0,...,k-1, as returned by Dgelqf. On return, A contains the matrix Q. +// +// tau must have length at least k, work must have length at least lwork and +// lwork must be at least max(1,m). On return, optimal value of lwork will be +// stored in work[0]. It must also hold that 0 <= k <= m <= n, otherwise Dorglq +// will panic. +// +// If lwork == -1, instead of performing Dorglq, the function only calculates +// the optimal value of lwork and stores it into work[0]. +func (impl Implementation) Dorglq(m, n, k int, a []float64, lda int, tau, work []float64, lwork int) { + switch { + case m < 0: + panic(mLT0) + case n < m: + panic(nLTM) + case k < 0: + panic(kLT0) + case k > m: + panic(kGTM) + case lda < max(1, n): + panic(badLdA) + case lwork < max(1, m) && lwork != -1: + panic(badLWork) + case len(work) < max(1, lwork): + panic(shortWork) + } + + if m == 0 { + work[0] = 1 + return + } + + nb := impl.Ilaenv(1, "DORGLQ", " ", m, n, k, -1) + if lwork == -1 { + work[0] = float64(m * nb) + return + } + + switch { + case len(a) < (m-1)*lda+n: + panic(shortA) + case len(tau) < k: + panic(shortTau) + } + + nbmin := 2 // Minimum block size + var nx int // Crossover size from blocked to unblocked code + iws := m // Length of work needed + var ldwork int + if 1 < nb && nb < k { + nx = max(0, impl.Ilaenv(3, "DORGLQ", " ", m, n, k, -1)) + if nx < k { + ldwork = nb + iws = m * ldwork + if lwork < iws { + nb = lwork / m + ldwork = nb + nbmin = max(2, impl.Ilaenv(2, "DORGLQ", " ", m, n, k, -1)) + } + } + } + + var ki, kk int + if nbmin <= nb && nb < k && nx < k { + // The first kk rows are handled by the blocked method. + ki = ((k - nx - 1) / nb) * nb + kk = min(k, ki+nb) + for i := kk; i < m; i++ { + for j := 0; j < kk; j++ { + a[i*lda+j] = 0 + } + } + } + if kk < m { + // Perform the operation on columns kk to the end. + impl.Dorgl2(m-kk, n-kk, k-kk, a[kk*lda+kk:], lda, tau[kk:], work) + } + if kk > 0 { + // Perform the operation on column-blocks + for i := ki; i >= 0; i -= nb { + ib := min(nb, k-i) + if i+ib < m { + impl.Dlarft(lapack.Forward, lapack.RowWise, + n-i, ib, + a[i*lda+i:], lda, + tau[i:], + work, ldwork) + + impl.Dlarfb(blas.Right, blas.Trans, lapack.Forward, lapack.RowWise, + m-i-ib, n-i, ib, + a[i*lda+i:], lda, + work, ldwork, + a[(i+ib)*lda+i:], lda, + work[ib*ldwork:], ldwork) + } + impl.Dorgl2(ib, n-i, ib, a[i*lda+i:], lda, tau[i:], work) + for l := i; l < i+ib; l++ { + for j := 0; j < i; j++ { + a[l*lda+j] = 0 + } + } + } + } + work[0] = float64(iws) +} diff --git a/vendor/gonum.org/v1/gonum/lapack/gonum/dorgql.go b/vendor/gonum.org/v1/gonum/lapack/gonum/dorgql.go new file mode 100644 index 0000000000..d5ef17f3b6 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dorgql.go @@ -0,0 +1,139 @@ +// Copyright ©2016 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gonum + +import ( + "gonum.org/v1/gonum/blas" + "gonum.org/v1/gonum/lapack" +) + +// Dorgql generates the m×n matrix Q with orthonormal columns defined as the +// last n columns of a product of k elementary reflectors of order m +// +// Q = H_{k-1} * ... * H_1 * H_0. +// +// It must hold that +// +// 0 <= k <= n <= m, +// +// and Dorgql will panic otherwise. +// +// On entry, the (n-k+i)-th column of A must contain the vector which defines +// the elementary reflector H_i, for i=0,...,k-1, and tau[i] must contain its +// scalar factor. On return, a contains the m×n matrix Q. +// +// tau must have length at least k, and Dorgql will panic otherwise. +// +// work must have length at least max(1,lwork), and lwork must be at least +// max(1,n), otherwise Dorgql will panic. For optimum performance lwork must +// be a sufficiently large multiple of n. +// +// If lwork == -1, instead of computing Dorgql the optimal work length is stored +// into work[0]. +// +// Dorgql is an internal routine. It is exported for testing purposes. +func (impl Implementation) Dorgql(m, n, k int, a []float64, lda int, tau, work []float64, lwork int) { + switch { + case m < 0: + panic(mLT0) + case n < 0: + panic(nLT0) + case n > m: + panic(nGTM) + case k < 0: + panic(kLT0) + case k > n: + panic(kGTN) + case lda < max(1, n): + panic(badLdA) + case lwork < max(1, n) && lwork != -1: + panic(badLWork) + case len(work) < max(1, lwork): + panic(shortWork) + } + + // Quick return if possible. + if n == 0 { + work[0] = 1 + return + } + + nb := impl.Ilaenv(1, "DORGQL", " ", m, n, k, -1) + if lwork == -1 { + work[0] = float64(n * nb) + return + } + + switch { + case len(a) < (m-1)*lda+n: + panic(shortA) + case len(tau) < k: + panic(shortTau) + } + + nbmin := 2 + var nx, ldwork int + iws := n + if 1 < nb && nb < k { + // Determine when to cross over from blocked to unblocked code. + nx = max(0, impl.Ilaenv(3, "DORGQL", " ", m, n, k, -1)) + if nx < k { + // Determine if workspace is large enough for blocked code. + iws = n * nb + if lwork < iws { + // Not enough workspace to use optimal nb: reduce nb and determine + // the minimum value of nb. + nb = lwork / n + nbmin = max(2, impl.Ilaenv(2, "DORGQL", " ", m, n, k, -1)) + } + ldwork = nb + } + } + + var kk int + if nbmin <= nb && nb < k && nx < k { + // Use blocked code after the first block. The last kk columns are handled + // by the block method. + kk = min(k, ((k-nx+nb-1)/nb)*nb) + + // Set A(m-kk:m, 0:n-kk) to zero. + for i := m - kk; i < m; i++ { + for j := 0; j < n-kk; j++ { + a[i*lda+j] = 0 + } + } + } + + // Use unblocked code for the first or only block. + impl.Dorg2l(m-kk, n-kk, k-kk, a, lda, tau, work) + if kk > 0 { + // Use blocked code. + for i := k - kk; i < k; i += nb { + ib := min(nb, k-i) + if n-k+i > 0 { + // Form the triangular factor of the block reflector + // H = H_{i+ib-1} * ... * H_{i+1} * H_i. + impl.Dlarft(lapack.Backward, lapack.ColumnWise, m-k+i+ib, ib, + a[n-k+i:], lda, tau[i:], work, ldwork) + + // Apply H to A[0:m-k+i+ib, 0:n-k+i] from the left. + impl.Dlarfb(blas.Left, blas.NoTrans, lapack.Backward, lapack.ColumnWise, + m-k+i+ib, n-k+i, ib, a[n-k+i:], lda, work, ldwork, + a, lda, work[ib*ldwork:], ldwork) + } + + // Apply H to rows 0:m-k+i+ib of current block. + impl.Dorg2l(m-k+i+ib, ib, ib, a[n-k+i:], lda, tau[i:], work) + + // Set rows m-k+i+ib:m of current block to zero. + for j := n - k + i; j < n-k+i+ib; j++ { + for l := m - k + i + ib; l < m; l++ { + a[l*lda+j] = 0 + } + } + } + } + work[0] = float64(iws) +} diff --git a/vendor/gonum.org/v1/gonum/lapack/gonum/dorgqr.go b/vendor/gonum.org/v1/gonum/lapack/gonum/dorgqr.go new file mode 100644 index 0000000000..a1e0fa8716 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dorgqr.go @@ -0,0 +1,136 @@ +// Copyright ©2015 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gonum + +import ( + "gonum.org/v1/gonum/blas" + "gonum.org/v1/gonum/lapack" +) + +// Dorgqr generates an m×n matrix Q with orthonormal columns defined by the +// product of elementary reflectors +// +// Q = H_0 * H_1 * ... * H_{k-1} +// +// as computed by Dgeqrf. +// Dorgqr is the blocked version of Dorg2r that makes greater use of level-3 BLAS +// routines. +// +// The length of tau must be k, and the length of work must be at least n. +// It also must be that 0 <= k <= n and 0 <= n <= m. +// +// work is temporary storage, and lwork specifies the usable memory length. At +// minimum, lwork >= n, and the amount of blocking is limited by the usable +// length. If lwork == -1, instead of computing Dorgqr the optimal work length +// is stored into work[0]. +// +// Dorgqr will panic if the conditions on input values are not met. +// +// Dorgqr is an internal routine. It is exported for testing purposes. +func (impl Implementation) Dorgqr(m, n, k int, a []float64, lda int, tau, work []float64, lwork int) { + switch { + case m < 0: + panic(mLT0) + case n < 0: + panic(nLT0) + case n > m: + panic(nGTM) + case k < 0: + panic(kLT0) + case k > n: + panic(kGTN) + case lda < max(1, n) && lwork != -1: + // Normally, we follow the reference and require the leading + // dimension to be always valid, even in case of workspace + // queries. However, if a caller provided a placeholder value + // for lda (and a) when doing a workspace query that didn't + // fulfill the condition here, it would cause a panic. This is + // exactly what Dgesvd does. + panic(badLdA) + case lwork < max(1, n) && lwork != -1: + panic(badLWork) + case len(work) < max(1, lwork): + panic(shortWork) + } + + if n == 0 { + work[0] = 1 + return + } + + nb := impl.Ilaenv(1, "DORGQR", " ", m, n, k, -1) + // work is treated as an n×nb matrix + if lwork == -1 { + work[0] = float64(n * nb) + return + } + + switch { + case len(a) < (m-1)*lda+n: + panic(shortA) + case len(tau) != k: + panic(badLenTau) + } + + nbmin := 2 // Minimum block size + var nx int // Crossover size from blocked to unblocked code + iws := n // Length of work needed + var ldwork int + if 1 < nb && nb < k { + nx = max(0, impl.Ilaenv(3, "DORGQR", " ", m, n, k, -1)) + if nx < k { + ldwork = nb + iws = n * ldwork + if lwork < iws { + nb = lwork / n + ldwork = nb + nbmin = max(2, impl.Ilaenv(2, "DORGQR", " ", m, n, k, -1)) + } + } + } + var ki, kk int + if nbmin <= nb && nb < k && nx < k { + // The first kk columns are handled by the blocked method. + ki = ((k - nx - 1) / nb) * nb + kk = min(k, ki+nb) + for i := 0; i < kk; i++ { + for j := kk; j < n; j++ { + a[i*lda+j] = 0 + } + } + } + if kk < n { + // Perform the operation on columns kk to the end. + impl.Dorg2r(m-kk, n-kk, k-kk, a[kk*lda+kk:], lda, tau[kk:], work) + } + if kk > 0 { + // Perform the operation on column-blocks. + for i := ki; i >= 0; i -= nb { + ib := min(nb, k-i) + if i+ib < n { + impl.Dlarft(lapack.Forward, lapack.ColumnWise, + m-i, ib, + a[i*lda+i:], lda, + tau[i:], + work, ldwork) + + impl.Dlarfb(blas.Left, blas.NoTrans, lapack.Forward, lapack.ColumnWise, + m-i, n-i-ib, ib, + a[i*lda+i:], lda, + work, ldwork, + a[i*lda+i+ib:], lda, + work[ib*ldwork:], ldwork) + } + impl.Dorg2r(m-i, ib, ib, a[i*lda+i:], lda, tau[i:i+ib], work) + // Set rows 0:i-1 of current block to zero. + for j := i; j < i+ib; j++ { + for l := 0; l < i; l++ { + a[l*lda+j] = 0 + } + } + } + } + work[0] = float64(iws) +} diff --git a/vendor/gonum.org/v1/gonum/lapack/gonum/dorgr2.go b/vendor/gonum.org/v1/gonum/lapack/gonum/dorgr2.go new file mode 100644 index 0000000000..6f2790cb8f --- /dev/null +++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dorgr2.go @@ -0,0 +1,83 @@ +// Copyright ©2021 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gonum + +import ( + "gonum.org/v1/gonum/blas" + "gonum.org/v1/gonum/blas/blas64" +) + +// Dorgr2 generates an m×n real matrix Q with orthonormal rows, which is defined +// as the last m rows of a product of k elementary reflectors of order n +// +// Q = H_0 * H_1 * ... * H_{k-1} +// +// as returned by Dgerqf. +// +// On entry, the (m-k+i)-th row of A must contain the vector which defines the +// elementary reflector H_i, for i = 0,1,...,k, as returned by Dgerqf. On +// return, A will contain the m×n matrix Q. +// +// The i-th element of tau must contain the scalar factor of the elementary +// reflector H_i, as returned by Dgerqf. +// +// It must hold that +// +// n >= m >= k >= 0, +// +// the length of tau must be k and the length of work must be m, otherwise +// Dorgr2 will panic. +// +// Dorgr2 is an internal routine. It is exported for testing purposes. +func (impl Implementation) Dorgr2(m, n, k int, a []float64, lda int, tau, work []float64) { + switch { + case k < 0: + panic(kLT0) + case m < k: + panic(kGTM) + case n < m: + panic(mGTN) + case lda < max(1, n): + panic(badLdA) + } + + // Quick return if possible. + if m == 0 { + return + } + + switch { + case len(tau) != k: + panic(badLenTau) + case len(a) < (m-1)*lda+n: + panic(shortA) + case len(work) < m: + panic(shortWork) + } + + // Initialise rows 0:m-k to rows of the unit matrix. + for l := 0; l < m-k; l++ { + row := a[l*lda : l*lda+n] + for j := range row { + row[j] = 0 + } + a[l*lda+n-m+l] = 1 + } + bi := blas64.Implementation() + for i := 0; i < k; i++ { + ii := m - k + i + + // Apply H_i to A[0:m-k+i+1, 0:n-k+i+1] from the right. + a[ii*lda+n-m+ii] = 1 + impl.Dlarf(blas.Right, ii, n-m+ii+1, a[ii*lda:], 1, tau[i], a, lda, work) + bi.Dscal(n-m+ii, -tau[i], a[ii*lda:], 1) + a[ii*lda+n-m+ii] = 1 - tau[i] + + // Set A[m-k+i, n-k+i:n] to zero. + for l := n - m + ii + 1; l < n; l++ { + a[ii*lda+l] = 0 + } + } +} diff --git a/vendor/gonum.org/v1/gonum/lapack/gonum/dorgtr.go b/vendor/gonum.org/v1/gonum/lapack/gonum/dorgtr.go new file mode 100644 index 0000000000..7021ae53d3 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dorgtr.go @@ -0,0 +1,106 @@ +// Copyright ©2016 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gonum + +import "gonum.org/v1/gonum/blas" + +// Dorgtr generates a real orthogonal matrix Q which is defined as the product +// of n-1 elementary reflectors of order n as returned by Dsytrd. +// +// The construction of Q depends on the value of uplo: +// +// Q = H_{n-1} * ... * H_1 * H_0 if uplo == blas.Upper +// Q = H_0 * H_1 * ... * H_{n-1} if uplo == blas.Lower +// +// where H_i is constructed from the elementary reflectors as computed by Dsytrd. +// See the documentation for Dsytrd for more information. +// +// tau must have length at least n-1, and Dorgtr will panic otherwise. +// +// work is temporary storage, and lwork specifies the usable memory length. At +// minimum, lwork >= max(1,n-1), and Dorgtr will panic otherwise. The amount of blocking +// is limited by the usable length. +// If lwork == -1, instead of computing Dorgtr the optimal work length is stored +// into work[0]. +// +// Dorgtr is an internal routine. It is exported for testing purposes. +func (impl Implementation) Dorgtr(uplo blas.Uplo, n int, a []float64, lda int, tau, work []float64, lwork int) { + switch { + case uplo != blas.Upper && uplo != blas.Lower: + panic(badUplo) + case n < 0: + panic(nLT0) + case lda < max(1, n): + panic(badLdA) + case lwork < max(1, n-1) && lwork != -1: + panic(badLWork) + case len(work) < max(1, lwork): + panic(shortWork) + } + + if n == 0 { + work[0] = 1 + return + } + + var nb int + if uplo == blas.Upper { + nb = impl.Ilaenv(1, "DORGQL", " ", n-1, n-1, n-1, -1) + } else { + nb = impl.Ilaenv(1, "DORGQR", " ", n-1, n-1, n-1, -1) + } + lworkopt := max(1, n-1) * nb + if lwork == -1 { + work[0] = float64(lworkopt) + return + } + + switch { + case len(a) < (n-1)*lda+n: + panic(shortA) + case len(tau) < n-1: + panic(shortTau) + } + + if uplo == blas.Upper { + // Q was determined by a call to Dsytrd with uplo == blas.Upper. + // Shift the vectors which define the elementary reflectors one column + // to the left, and set the last row and column of Q to those of the unit + // matrix. + for j := 0; j < n-1; j++ { + for i := 0; i < j; i++ { + a[i*lda+j] = a[i*lda+j+1] + } + a[(n-1)*lda+j] = 0 + } + for i := 0; i < n-1; i++ { + a[i*lda+n-1] = 0 + } + a[(n-1)*lda+n-1] = 1 + + // Generate Q[0:n-1, 0:n-1]. + impl.Dorgql(n-1, n-1, n-1, a, lda, tau, work, lwork) + } else { + // Q was determined by a call to Dsytrd with uplo == blas.Upper. + // Shift the vectors which define the elementary reflectors one column + // to the right, and set the first row and column of Q to those of the unit + // matrix. + for j := n - 1; j > 0; j-- { + a[j] = 0 + for i := j + 1; i < n; i++ { + a[i*lda+j] = a[i*lda+j-1] + } + } + a[0] = 1 + for i := 1; i < n; i++ { + a[i*lda] = 0 + } + if n > 1 { + // Generate Q[1:n, 1:n]. + impl.Dorgqr(n-1, n-1, n-1, a[lda+1:], lda, tau[:n-1], work, lwork) + } + } + work[0] = float64(lworkopt) +} diff --git a/vendor/gonum.org/v1/gonum/lapack/gonum/dorm2r.go b/vendor/gonum.org/v1/gonum/lapack/gonum/dorm2r.go new file mode 100644 index 0000000000..aea77a70d2 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dorm2r.go @@ -0,0 +1,103 @@ +// Copyright ©2015 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gonum + +import "gonum.org/v1/gonum/blas" + +// Dorm2r multiplies a general matrix C by an orthogonal matrix from a QR factorization +// determined by Dgeqrf. +// +// C = Q * C if side == blas.Left and trans == blas.NoTrans +// C = Qᵀ * C if side == blas.Left and trans == blas.Trans +// C = C * Q if side == blas.Right and trans == blas.NoTrans +// C = C * Qᵀ if side == blas.Right and trans == blas.Trans +// +// If side == blas.Left, a is a matrix of size m×k, and if side == blas.Right +// a is of size n×k. +// +// tau contains the Householder factors and must have length k and this function +// will panic otherwise. +// +// work is temporary storage of length at least n if side == blas.Left +// and at least m if side == blas.Right and this function will panic otherwise. +// +// Dorm2r is an internal routine. It is exported for testing purposes. +func (impl Implementation) Dorm2r(side blas.Side, trans blas.Transpose, m, n, k int, a []float64, lda int, tau, c []float64, ldc int, work []float64) { + left := side == blas.Left + switch { + case !left && side != blas.Right: + panic(badSide) + case trans != blas.Trans && trans != blas.NoTrans: + panic(badTrans) + case m < 0: + panic(mLT0) + case n < 0: + panic(nLT0) + case k < 0: + panic(kLT0) + case left && k > m: + panic(kGTM) + case !left && k > n: + panic(kGTN) + case lda < max(1, k): + panic(badLdA) + case ldc < max(1, n): + panic(badLdC) + } + + // Quick return if possible. + if m == 0 || n == 0 || k == 0 { + return + } + + switch { + case left && len(a) < (m-1)*lda+k: + panic(shortA) + case !left && len(a) < (n-1)*lda+k: + panic(shortA) + case len(c) < (m-1)*ldc+n: + panic(shortC) + case len(tau) != k: + panic(badLenTau) + case left && len(work) < n: + panic(shortWork) + case !left && len(work) < m: + panic(shortWork) + } + + if left { + if trans == blas.NoTrans { + for i := k - 1; i >= 0; i-- { + aii := a[i*lda+i] + a[i*lda+i] = 1 + impl.Dlarf(side, m-i, n, a[i*lda+i:], lda, tau[i], c[i*ldc:], ldc, work) + a[i*lda+i] = aii + } + return + } + for i := 0; i < k; i++ { + aii := a[i*lda+i] + a[i*lda+i] = 1 + impl.Dlarf(side, m-i, n, a[i*lda+i:], lda, tau[i], c[i*ldc:], ldc, work) + a[i*lda+i] = aii + } + return + } + if trans == blas.NoTrans { + for i := 0; i < k; i++ { + aii := a[i*lda+i] + a[i*lda+i] = 1 + impl.Dlarf(side, m, n-i, a[i*lda+i:], lda, tau[i], c[i:], ldc, work) + a[i*lda+i] = aii + } + return + } + for i := k - 1; i >= 0; i-- { + aii := a[i*lda+i] + a[i*lda+i] = 1 + impl.Dlarf(side, m, n-i, a[i*lda+i:], lda, tau[i], c[i:], ldc, work) + a[i*lda+i] = aii + } +} diff --git a/vendor/gonum.org/v1/gonum/lapack/gonum/dormbr.go b/vendor/gonum.org/v1/gonum/lapack/gonum/dormbr.go new file mode 100644 index 0000000000..8be7040c92 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dormbr.go @@ -0,0 +1,180 @@ +// Copyright ©2015 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gonum + +import ( + "gonum.org/v1/gonum/blas" + "gonum.org/v1/gonum/lapack" +) + +// Dormbr applies a multiplicative update to the matrix C based on a +// decomposition computed by Dgebrd. +// +// Dormbr overwrites the m×n matrix C with +// +// Q * C if vect == lapack.ApplyQ, side == blas.Left, and trans == blas.NoTrans +// C * Q if vect == lapack.ApplyQ, side == blas.Right, and trans == blas.NoTrans +// Qᵀ * C if vect == lapack.ApplyQ, side == blas.Left, and trans == blas.Trans +// C * Qᵀ if vect == lapack.ApplyQ, side == blas.Right, and trans == blas.Trans +// +// P * C if vect == lapack.ApplyP, side == blas.Left, and trans == blas.NoTrans +// C * P if vect == lapack.ApplyP, side == blas.Right, and trans == blas.NoTrans +// Pᵀ * C if vect == lapack.ApplyP, side == blas.Left, and trans == blas.Trans +// C * Pᵀ if vect == lapack.ApplyP, side == blas.Right, and trans == blas.Trans +// +// where P and Q are the orthogonal matrices determined by Dgebrd when reducing +// a matrix A to bidiagonal form: A = Q * B * Pᵀ. See Dgebrd for the +// definitions of Q and P. +// +// If vect == lapack.ApplyQ, A is assumed to have been an nq×k matrix, while if +// vect == lapack.ApplyP, A is assumed to have been a k×nq matrix. nq = m if +// side == blas.Left, while nq = n if side == blas.Right. +// +// tau must have length min(nq,k), and Dormbr will panic otherwise. tau contains +// the elementary reflectors to construct Q or P depending on the value of +// vect. +// +// work must have length at least max(1,lwork), and lwork must be either -1 or +// at least max(1,n) if side == blas.Left, and at least max(1,m) if side == +// blas.Right. For optimum performance lwork should be at least n*nb if side == +// blas.Left, and at least m*nb if side == blas.Right, where nb is the optimal +// block size. On return, work[0] will contain the optimal value of lwork. +// +// If lwork == -1, the function only calculates the optimal value of lwork and +// returns it in work[0]. +// +// Dormbr is an internal routine. It is exported for testing purposes. +func (impl Implementation) Dormbr(vect lapack.ApplyOrtho, side blas.Side, trans blas.Transpose, m, n, k int, a []float64, lda int, tau, c []float64, ldc int, work []float64, lwork int) { + nq := n + nw := m + if side == blas.Left { + nq = m + nw = n + } + applyQ := vect == lapack.ApplyQ + switch { + case !applyQ && vect != lapack.ApplyP: + panic(badApplyOrtho) + case side != blas.Left && side != blas.Right: + panic(badSide) + case trans != blas.NoTrans && trans != blas.Trans: + panic(badTrans) + case m < 0: + panic(mLT0) + case n < 0: + panic(nLT0) + case k < 0: + panic(kLT0) + case applyQ && lda < max(1, min(nq, k)): + panic(badLdA) + case !applyQ && lda < max(1, nq): + panic(badLdA) + case ldc < max(1, n): + panic(badLdC) + case lwork < max(1, nw) && lwork != -1: + panic(badLWork) + case len(work) < max(1, lwork): + panic(shortWork) + } + + // Quick return if possible. + if m == 0 || n == 0 { + work[0] = 1 + return + } + + // The current implementation does not use opts, but a future change may + // use these options so construct them. + var opts string + if side == blas.Left { + opts = "L" + } else { + opts = "R" + } + if trans == blas.Trans { + opts += "T" + } else { + opts += "N" + } + var nb int + if applyQ { + if side == blas.Left { + nb = impl.Ilaenv(1, "DORMQR", opts, m-1, n, m-1, -1) + } else { + nb = impl.Ilaenv(1, "DORMQR", opts, m, n-1, n-1, -1) + } + } else { + if side == blas.Left { + nb = impl.Ilaenv(1, "DORMLQ", opts, m-1, n, m-1, -1) + } else { + nb = impl.Ilaenv(1, "DORMLQ", opts, m, n-1, n-1, -1) + } + } + lworkopt := max(1, nw) * nb + if lwork == -1 { + work[0] = float64(lworkopt) + return + } + + minnqk := min(nq, k) + switch { + case applyQ && len(a) < (nq-1)*lda+minnqk: + panic(shortA) + case !applyQ && len(a) < (minnqk-1)*lda+nq: + panic(shortA) + case len(tau) < minnqk: + panic(shortTau) + case len(c) < (m-1)*ldc+n: + panic(shortC) + } + + if applyQ { + // Change the operation to get Q depending on the size of the initial + // matrix to Dgebrd. The size matters due to the storage location of + // the off-diagonal elements. + if nq >= k { + impl.Dormqr(side, trans, m, n, k, a, lda, tau[:k], c, ldc, work, lwork) + } else if nq > 1 { + mi := m + ni := n - 1 + i1 := 0 + i2 := 1 + if side == blas.Left { + mi = m - 1 + ni = n + i1 = 1 + i2 = 0 + } + impl.Dormqr(side, trans, mi, ni, nq-1, a[lda:], lda, tau[:nq-1], c[i1*ldc+i2:], ldc, work, lwork) + } + work[0] = float64(lworkopt) + return + } + + transt := blas.Trans + if trans == blas.Trans { + transt = blas.NoTrans + } + + // Change the operation to get P depending on the size of the initial + // matrix to Dgebrd. The size matters due to the storage location of + // the off-diagonal elements. + if nq > k { + impl.Dormlq(side, transt, m, n, k, a, lda, tau, c, ldc, work, lwork) + } else if nq > 1 { + mi := m + ni := n - 1 + i1 := 0 + i2 := 1 + if side == blas.Left { + mi = m - 1 + ni = n + i1 = 1 + i2 = 0 + } + impl.Dormlq(side, transt, mi, ni, nq-1, a[1:], lda, tau, c[i1*ldc+i2:], ldc, work, lwork) + } + work[0] = float64(lworkopt) +} diff --git a/vendor/gonum.org/v1/gonum/lapack/gonum/dormhr.go b/vendor/gonum.org/v1/gonum/lapack/gonum/dormhr.go new file mode 100644 index 0000000000..318a57adca --- /dev/null +++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dormhr.go @@ -0,0 +1,134 @@ +// Copyright ©2016 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gonum + +import "gonum.org/v1/gonum/blas" + +// Dormhr multiplies an m×n general matrix C with an nq×nq orthogonal matrix Q +// +// Q * C if side == blas.Left and trans == blas.NoTrans, +// Qᵀ * C if side == blas.Left and trans == blas.Trans, +// C * Q if side == blas.Right and trans == blas.NoTrans, +// C * Qᵀ if side == blas.Right and trans == blas.Trans, +// +// where nq == m if side == blas.Left and nq == n if side == blas.Right. +// +// Q is defined implicitly as the product of ihi-ilo elementary reflectors, as +// returned by Dgehrd: +// +// Q = H_{ilo} H_{ilo+1} ... H_{ihi-1}. +// +// Q is equal to the identity matrix except in the submatrix +// Q[ilo+1:ihi+1,ilo+1:ihi+1]. +// +// ilo and ihi must have the same values as in the previous call of Dgehrd. It +// must hold that +// +// 0 <= ilo <= ihi < m if m > 0 and side == blas.Left, +// ilo = 0 and ihi = -1 if m = 0 and side == blas.Left, +// 0 <= ilo <= ihi < n if n > 0 and side == blas.Right, +// ilo = 0 and ihi = -1 if n = 0 and side == blas.Right. +// +// a and lda represent an m×m matrix if side == blas.Left and an n×n matrix if +// side == blas.Right. The matrix contains vectors which define the elementary +// reflectors, as returned by Dgehrd. +// +// tau contains the scalar factors of the elementary reflectors, as returned by +// Dgehrd. tau must have length m-1 if side == blas.Left and n-1 if side == +// blas.Right. +// +// c and ldc represent the m×n matrix C. On return, c is overwritten by the +// product with Q. +// +// work must have length at least max(1,lwork), and lwork must be at least +// max(1,n), if side == blas.Left, and max(1,m), if side == blas.Right. For +// optimum performance lwork should be at least n*nb if side == blas.Left and +// m*nb if side == blas.Right, where nb is the optimal block size. On return, +// work[0] will contain the optimal value of lwork. +// +// If lwork == -1, instead of performing Dormhr, only the optimal value of lwork +// will be stored in work[0]. +// +// If any requirement on input sizes is not met, Dormhr will panic. +// +// Dormhr is an internal routine. It is exported for testing purposes. +func (impl Implementation) Dormhr(side blas.Side, trans blas.Transpose, m, n, ilo, ihi int, a []float64, lda int, tau, c []float64, ldc int, work []float64, lwork int) { + nq := n // The order of Q. + nw := m // The minimum length of work. + if side == blas.Left { + nq = m + nw = n + } + switch { + case side != blas.Left && side != blas.Right: + panic(badSide) + case trans != blas.NoTrans && trans != blas.Trans: + panic(badTrans) + case m < 0: + panic(mLT0) + case n < 0: + panic(nLT0) + case ilo < 0 || max(1, nq) <= ilo: + panic(badIlo) + case ihi < min(ilo, nq-1) || nq <= ihi: + panic(badIhi) + case lda < max(1, nq): + panic(badLdA) + case lwork < max(1, nw) && lwork != -1: + panic(badLWork) + case len(work) < max(1, lwork): + panic(shortWork) + } + + // Quick return if possible. + if m == 0 || n == 0 { + work[0] = 1 + return + } + + nh := ihi - ilo + var nb int + if side == blas.Left { + opts := "LN" + if trans == blas.Trans { + opts = "LT" + } + nb = impl.Ilaenv(1, "DORMQR", opts, nh, n, nh, -1) + } else { + opts := "RN" + if trans == blas.Trans { + opts = "RT" + } + nb = impl.Ilaenv(1, "DORMQR", opts, m, nh, nh, -1) + } + lwkopt := max(1, nw) * nb + if lwork == -1 { + work[0] = float64(lwkopt) + return + } + + if nh == 0 { + work[0] = 1 + return + } + + switch { + case len(a) < (nq-1)*lda+nq: + panic(shortA) + case len(c) < (m-1)*ldc+n: + panic(shortC) + case len(tau) != nq-1: + panic(badLenTau) + } + + if side == blas.Left { + impl.Dormqr(side, trans, nh, n, nh, a[(ilo+1)*lda+ilo:], lda, + tau[ilo:ihi], c[(ilo+1)*ldc:], ldc, work, lwork) + } else { + impl.Dormqr(side, trans, m, nh, nh, a[(ilo+1)*lda+ilo:], lda, + tau[ilo:ihi], c[ilo+1:], ldc, work, lwork) + } + work[0] = float64(lwkopt) +} diff --git a/vendor/gonum.org/v1/gonum/lapack/gonum/dorml2.go b/vendor/gonum.org/v1/gonum/lapack/gonum/dorml2.go new file mode 100644 index 0000000000..665e2102c8 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dorml2.go @@ -0,0 +1,104 @@ +// Copyright ©2015 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gonum + +import "gonum.org/v1/gonum/blas" + +// Dorml2 multiplies a general matrix C by an orthogonal matrix from an LQ factorization +// determined by Dgelqf. +// +// C = Q * C if side == blas.Left and trans == blas.NoTrans +// C = Qᵀ * C if side == blas.Left and trans == blas.Trans +// C = C * Q if side == blas.Right and trans == blas.NoTrans +// C = C * Qᵀ if side == blas.Right and trans == blas.Trans +// +// If side == blas.Left, a is a matrix of side k×m, and if side == blas.Right +// a is of size k×n. +// +// tau contains the Householder factors and is of length at least k and this function will +// panic otherwise. +// +// work is temporary storage of length at least n if side == blas.Left +// and at least m if side == blas.Right and this function will panic otherwise. +// +// Dorml2 is an internal routine. It is exported for testing purposes. +func (impl Implementation) Dorml2(side blas.Side, trans blas.Transpose, m, n, k int, a []float64, lda int, tau, c []float64, ldc int, work []float64) { + left := side == blas.Left + switch { + case !left && side != blas.Right: + panic(badSide) + case trans != blas.Trans && trans != blas.NoTrans: + panic(badTrans) + case m < 0: + panic(mLT0) + case n < 0: + panic(nLT0) + case k < 0: + panic(kLT0) + case left && k > m: + panic(kGTM) + case !left && k > n: + panic(kGTN) + case left && lda < max(1, m): + panic(badLdA) + case !left && lda < max(1, n): + panic(badLdA) + } + + // Quick return if possible. + if m == 0 || n == 0 || k == 0 { + return + } + + switch { + case left && len(a) < (k-1)*lda+m: + panic(shortA) + case !left && len(a) < (k-1)*lda+n: + panic(shortA) + case len(tau) < k: + panic(shortTau) + case len(c) < (m-1)*ldc+n: + panic(shortC) + case left && len(work) < n: + panic(shortWork) + case !left && len(work) < m: + panic(shortWork) + } + + notrans := trans == blas.NoTrans + switch { + case left && notrans: + for i := 0; i < k; i++ { + aii := a[i*lda+i] + a[i*lda+i] = 1 + impl.Dlarf(side, m-i, n, a[i*lda+i:], 1, tau[i], c[i*ldc:], ldc, work) + a[i*lda+i] = aii + } + + case left && !notrans: + for i := k - 1; i >= 0; i-- { + aii := a[i*lda+i] + a[i*lda+i] = 1 + impl.Dlarf(side, m-i, n, a[i*lda+i:], 1, tau[i], c[i*ldc:], ldc, work) + a[i*lda+i] = aii + } + + case !left && notrans: + for i := k - 1; i >= 0; i-- { + aii := a[i*lda+i] + a[i*lda+i] = 1 + impl.Dlarf(side, m, n-i, a[i*lda+i:], 1, tau[i], c[i:], ldc, work) + a[i*lda+i] = aii + } + + case !left && !notrans: + for i := 0; i < k; i++ { + aii := a[i*lda+i] + a[i*lda+i] = 1 + impl.Dlarf(side, m, n-i, a[i*lda+i:], 1, tau[i], c[i:], ldc, work) + a[i*lda+i] = aii + } + } +} diff --git a/vendor/gonum.org/v1/gonum/lapack/gonum/dormlq.go b/vendor/gonum.org/v1/gonum/lapack/gonum/dormlq.go new file mode 100644 index 0000000000..37b499739a --- /dev/null +++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dormlq.go @@ -0,0 +1,176 @@ +// Copyright ©2015 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gonum + +import ( + "gonum.org/v1/gonum/blas" + "gonum.org/v1/gonum/lapack" +) + +// Dormlq multiplies the matrix C by the orthogonal matrix Q defined by the +// slices a and tau. A and tau are as returned from Dgelqf. +// +// C = Q * C if side == blas.Left and trans == blas.NoTrans +// C = Qᵀ * C if side == blas.Left and trans == blas.Trans +// C = C * Q if side == blas.Right and trans == blas.NoTrans +// C = C * Qᵀ if side == blas.Right and trans == blas.Trans +// +// If side == blas.Left, A is a matrix of side k×m, and if side == blas.Right +// A is of size k×n. This uses a blocked algorithm. +// +// work is temporary storage, and lwork specifies the usable memory length. +// At minimum, lwork >= m if side == blas.Left and lwork >= n if side == blas.Right, +// and this function will panic otherwise. +// Dormlq uses a block algorithm, but the block size is limited +// by the temporary space available. If lwork == -1, instead of performing Dormlq, +// the optimal work length will be stored into work[0]. +// +// tau contains the Householder scales and must have length at least k, and +// this function will panic otherwise. +func (impl Implementation) Dormlq(side blas.Side, trans blas.Transpose, m, n, k int, a []float64, lda int, tau, c []float64, ldc int, work []float64, lwork int) { + left := side == blas.Left + nw := m + if left { + nw = n + } + switch { + case !left && side != blas.Right: + panic(badSide) + case trans != blas.Trans && trans != blas.NoTrans: + panic(badTrans) + case m < 0: + panic(mLT0) + case n < 0: + panic(nLT0) + case k < 0: + panic(kLT0) + case left && k > m: + panic(kGTM) + case !left && k > n: + panic(kGTN) + case left && lda < max(1, m): + panic(badLdA) + case !left && lda < max(1, n): + panic(badLdA) + case lwork < max(1, nw) && lwork != -1: + panic(badLWork) + case len(work) < max(1, lwork): + panic(shortWork) + } + + // Quick return if possible. + if m == 0 || n == 0 || k == 0 { + work[0] = 1 + return + } + + const ( + nbmax = 64 + ldt = nbmax + tsize = nbmax * ldt + ) + opts := string(side) + string(trans) + nb := min(nbmax, impl.Ilaenv(1, "DORMLQ", opts, m, n, k, -1)) + lworkopt := max(1, nw)*nb + tsize + if lwork == -1 { + work[0] = float64(lworkopt) + return + } + + switch { + case left && len(a) < (k-1)*lda+m: + panic(shortA) + case !left && len(a) < (k-1)*lda+n: + panic(shortA) + case len(tau) < k: + panic(shortTau) + case len(c) < (m-1)*ldc+n: + panic(shortC) + } + + nbmin := 2 + if 1 < nb && nb < k { + iws := nw*nb + tsize + if lwork < iws { + nb = (lwork - tsize) / nw + nbmin = max(2, impl.Ilaenv(2, "DORMLQ", opts, m, n, k, -1)) + } + } + if nb < nbmin || k <= nb { + // Call unblocked code. + impl.Dorml2(side, trans, m, n, k, a, lda, tau, c, ldc, work) + work[0] = float64(lworkopt) + return + } + + t := work[:tsize] + wrk := work[tsize:] + ldwrk := nb + + notrans := trans == blas.NoTrans + transt := blas.NoTrans + if notrans { + transt = blas.Trans + } + + switch { + case left && notrans: + for i := 0; i < k; i += nb { + ib := min(nb, k-i) + impl.Dlarft(lapack.Forward, lapack.RowWise, m-i, ib, + a[i*lda+i:], lda, + tau[i:], + t, ldt) + impl.Dlarfb(side, transt, lapack.Forward, lapack.RowWise, m-i, n, ib, + a[i*lda+i:], lda, + t, ldt, + c[i*ldc:], ldc, + wrk, ldwrk) + } + + case left && !notrans: + for i := ((k - 1) / nb) * nb; i >= 0; i -= nb { + ib := min(nb, k-i) + impl.Dlarft(lapack.Forward, lapack.RowWise, m-i, ib, + a[i*lda+i:], lda, + tau[i:], + t, ldt) + impl.Dlarfb(side, transt, lapack.Forward, lapack.RowWise, m-i, n, ib, + a[i*lda+i:], lda, + t, ldt, + c[i*ldc:], ldc, + wrk, ldwrk) + } + + case !left && notrans: + for i := ((k - 1) / nb) * nb; i >= 0; i -= nb { + ib := min(nb, k-i) + impl.Dlarft(lapack.Forward, lapack.RowWise, n-i, ib, + a[i*lda+i:], lda, + tau[i:], + t, ldt) + impl.Dlarfb(side, transt, lapack.Forward, lapack.RowWise, m, n-i, ib, + a[i*lda+i:], lda, + t, ldt, + c[i:], ldc, + wrk, ldwrk) + } + + case !left && !notrans: + for i := 0; i < k; i += nb { + ib := min(nb, k-i) + impl.Dlarft(lapack.Forward, lapack.RowWise, n-i, ib, + a[i*lda+i:], lda, + tau[i:], + t, ldt) + impl.Dlarfb(side, transt, lapack.Forward, lapack.RowWise, m, n-i, ib, + a[i*lda+i:], lda, + t, ldt, + c[i:], ldc, + wrk, ldwrk) + } + } + work[0] = float64(lworkopt) +} diff --git a/vendor/gonum.org/v1/gonum/lapack/gonum/dormqr.go b/vendor/gonum.org/v1/gonum/lapack/gonum/dormqr.go new file mode 100644 index 0000000000..c1e5668be5 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dormqr.go @@ -0,0 +1,180 @@ +// Copyright ©2015 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gonum + +import ( + "gonum.org/v1/gonum/blas" + "gonum.org/v1/gonum/lapack" +) + +// Dormqr multiplies an m×n matrix C by an orthogonal matrix Q as +// +// C = Q * C if side == blas.Left and trans == blas.NoTrans, +// C = Qᵀ * C if side == blas.Left and trans == blas.Trans, +// C = C * Q if side == blas.Right and trans == blas.NoTrans, +// C = C * Qᵀ if side == blas.Right and trans == blas.Trans, +// +// where Q is defined as the product of k elementary reflectors +// +// Q = H_0 * H_1 * ... * H_{k-1}. +// +// If side == blas.Left, A is an m×k matrix and 0 <= k <= m. +// If side == blas.Right, A is an n×k matrix and 0 <= k <= n. +// The ith column of A contains the vector which defines the elementary +// reflector H_i and tau[i] contains its scalar factor. tau must have length k +// and Dormqr will panic otherwise. Dgeqrf returns A and tau in the required +// form. +// +// work must have length at least max(1,lwork), and lwork must be at least n if +// side == blas.Left and at least m if side == blas.Right, otherwise Dormqr will +// panic. +// +// work is temporary storage, and lwork specifies the usable memory length. At +// minimum, lwork >= m if side == blas.Left and lwork >= n if side == +// blas.Right, and this function will panic otherwise. Larger values of lwork +// will generally give better performance. On return, work[0] will contain the +// optimal value of lwork. +// +// If lwork is -1, instead of performing Dormqr, the optimal workspace size will +// be stored into work[0]. +func (impl Implementation) Dormqr(side blas.Side, trans blas.Transpose, m, n, k int, a []float64, lda int, tau, c []float64, ldc int, work []float64, lwork int) { + left := side == blas.Left + nq := n + nw := m + if left { + nq = m + nw = n + } + switch { + case !left && side != blas.Right: + panic(badSide) + case trans != blas.NoTrans && trans != blas.Trans: + panic(badTrans) + case m < 0: + panic(mLT0) + case n < 0: + panic(nLT0) + case k < 0: + panic(kLT0) + case left && k > m: + panic(kGTM) + case !left && k > n: + panic(kGTN) + case lda < max(1, k): + panic(badLdA) + case ldc < max(1, n): + panic(badLdC) + case lwork < max(1, nw) && lwork != -1: + panic(badLWork) + case len(work) < max(1, lwork): + panic(shortWork) + } + + // Quick return if possible. + if m == 0 || n == 0 || k == 0 { + work[0] = 1 + return + } + + const ( + nbmax = 64 + ldt = nbmax + tsize = nbmax * ldt + ) + opts := string(side) + string(trans) + nb := min(nbmax, impl.Ilaenv(1, "DORMQR", opts, m, n, k, -1)) + lworkopt := max(1, nw)*nb + tsize + if lwork == -1 { + work[0] = float64(lworkopt) + return + } + + switch { + case len(a) < (nq-1)*lda+k: + panic(shortA) + case len(tau) != k: + panic(badLenTau) + case len(c) < (m-1)*ldc+n: + panic(shortC) + } + + nbmin := 2 + if 1 < nb && nb < k { + if lwork < nw*nb+tsize { + nb = (lwork - tsize) / nw + nbmin = max(2, impl.Ilaenv(2, "DORMQR", opts, m, n, k, -1)) + } + } + + if nb < nbmin || k <= nb { + // Call unblocked code. + impl.Dorm2r(side, trans, m, n, k, a, lda, tau, c, ldc, work) + work[0] = float64(lworkopt) + return + } + + var ( + ldwork = nb + notrans = trans == blas.NoTrans + ) + switch { + case left && notrans: + for i := ((k - 1) / nb) * nb; i >= 0; i -= nb { + ib := min(nb, k-i) + impl.Dlarft(lapack.Forward, lapack.ColumnWise, m-i, ib, + a[i*lda+i:], lda, + tau[i:], + work[:tsize], ldt) + impl.Dlarfb(side, trans, lapack.Forward, lapack.ColumnWise, m-i, n, ib, + a[i*lda+i:], lda, + work[:tsize], ldt, + c[i*ldc:], ldc, + work[tsize:], ldwork) + } + + case left && !notrans: + for i := 0; i < k; i += nb { + ib := min(nb, k-i) + impl.Dlarft(lapack.Forward, lapack.ColumnWise, m-i, ib, + a[i*lda+i:], lda, + tau[i:], + work[:tsize], ldt) + impl.Dlarfb(side, trans, lapack.Forward, lapack.ColumnWise, m-i, n, ib, + a[i*lda+i:], lda, + work[:tsize], ldt, + c[i*ldc:], ldc, + work[tsize:], ldwork) + } + + case !left && notrans: + for i := 0; i < k; i += nb { + ib := min(nb, k-i) + impl.Dlarft(lapack.Forward, lapack.ColumnWise, n-i, ib, + a[i*lda+i:], lda, + tau[i:], + work[:tsize], ldt) + impl.Dlarfb(side, trans, lapack.Forward, lapack.ColumnWise, m, n-i, ib, + a[i*lda+i:], lda, + work[:tsize], ldt, + c[i:], ldc, + work[tsize:], ldwork) + } + + case !left && !notrans: + for i := ((k - 1) / nb) * nb; i >= 0; i -= nb { + ib := min(nb, k-i) + impl.Dlarft(lapack.Forward, lapack.ColumnWise, n-i, ib, + a[i*lda+i:], lda, + tau[i:], + work[:tsize], ldt) + impl.Dlarfb(side, trans, lapack.Forward, lapack.ColumnWise, m, n-i, ib, + a[i*lda+i:], lda, + work[:tsize], ldt, + c[i:], ldc, + work[tsize:], ldwork) + } + } + work[0] = float64(lworkopt) +} diff --git a/vendor/gonum.org/v1/gonum/lapack/gonum/dormr2.go b/vendor/gonum.org/v1/gonum/lapack/gonum/dormr2.go new file mode 100644 index 0000000000..59d4d4f17e --- /dev/null +++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dormr2.go @@ -0,0 +1,105 @@ +// Copyright ©2015 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gonum + +import "gonum.org/v1/gonum/blas" + +// Dormr2 multiplies a general matrix C by an orthogonal matrix from a RQ factorization +// determined by Dgerqf. +// +// C = Q * C if side == blas.Left and trans == blas.NoTrans +// C = Qᵀ * C if side == blas.Left and trans == blas.Trans +// C = C * Q if side == blas.Right and trans == blas.NoTrans +// C = C * Qᵀ if side == blas.Right and trans == blas.Trans +// +// If side == blas.Left, a is a matrix of size k×m, and if side == blas.Right +// a is of size k×n. +// +// tau contains the Householder factors and is of length at least k and this function +// will panic otherwise. +// +// work is temporary storage of length at least n if side == blas.Left +// and at least m if side == blas.Right and this function will panic otherwise. +// +// Dormr2 is an internal routine. It is exported for testing purposes. +func (impl Implementation) Dormr2(side blas.Side, trans blas.Transpose, m, n, k int, a []float64, lda int, tau, c []float64, ldc int, work []float64) { + left := side == blas.Left + nq := n + nw := m + if left { + nq = m + nw = n + } + switch { + case !left && side != blas.Right: + panic(badSide) + case trans != blas.NoTrans && trans != blas.Trans: + panic(badTrans) + case m < 0: + panic(mLT0) + case n < 0: + panic(nLT0) + case k < 0: + panic(kLT0) + case left && k > m: + panic(kGTM) + case !left && k > n: + panic(kGTN) + case lda < max(1, nq): + panic(badLdA) + case ldc < max(1, n): + panic(badLdC) + } + + // Quick return if possible. + if m == 0 || n == 0 || k == 0 { + return + } + + switch { + case len(a) < (k-1)*lda+nq: + panic(shortA) + case len(tau) < k: + panic(shortTau) + case len(c) < (m-1)*ldc+n: + panic(shortC) + case len(work) < nw: + panic(shortWork) + } + + if left { + if trans == blas.NoTrans { + for i := k - 1; i >= 0; i-- { + aii := a[i*lda+(m-k+i)] + a[i*lda+(m-k+i)] = 1 + impl.Dlarf(side, m-k+i+1, n, a[i*lda:], 1, tau[i], c, ldc, work) + a[i*lda+(m-k+i)] = aii + } + return + } + for i := 0; i < k; i++ { + aii := a[i*lda+(m-k+i)] + a[i*lda+(m-k+i)] = 1 + impl.Dlarf(side, m-k+i+1, n, a[i*lda:], 1, tau[i], c, ldc, work) + a[i*lda+(m-k+i)] = aii + } + return + } + if trans == blas.NoTrans { + for i := 0; i < k; i++ { + aii := a[i*lda+(n-k+i)] + a[i*lda+(n-k+i)] = 1 + impl.Dlarf(side, m, n-k+i+1, a[i*lda:], 1, tau[i], c, ldc, work) + a[i*lda+(n-k+i)] = aii + } + return + } + for i := k - 1; i >= 0; i-- { + aii := a[i*lda+(n-k+i)] + a[i*lda+(n-k+i)] = 1 + impl.Dlarf(side, m, n-k+i+1, a[i*lda:], 1, tau[i], c, ldc, work) + a[i*lda+(n-k+i)] = aii + } +} diff --git a/vendor/gonum.org/v1/gonum/lapack/gonum/dpbcon.go b/vendor/gonum.org/v1/gonum/lapack/gonum/dpbcon.go new file mode 100644 index 0000000000..0ed63e62dd --- /dev/null +++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dpbcon.go @@ -0,0 +1,111 @@ +// Copyright ©2019 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gonum + +import ( + "math" + + "gonum.org/v1/gonum/blas" + "gonum.org/v1/gonum/blas/blas64" +) + +// Dpbcon returns an estimate of the reciprocal of the condition number (in the +// 1-norm) of an n×n symmetric positive definite band matrix using the Cholesky +// factorization +// +// A = Uᵀ*U if uplo == blas.Upper +// A = L*Lᵀ if uplo == blas.Lower +// +// computed by Dpbtrf. The estimate is obtained for norm(inv(A)), and the +// reciprocal of the condition number is computed as +// +// rcond = 1 / (anorm * norm(inv(A))). +// +// The length of work must be at least 3*n and the length of iwork must be at +// least n. +func (impl Implementation) Dpbcon(uplo blas.Uplo, n, kd int, ab []float64, ldab int, anorm float64, work []float64, iwork []int) (rcond float64) { + switch { + case uplo != blas.Upper && uplo != blas.Lower: + panic(badUplo) + case n < 0: + panic(nLT0) + case kd < 0: + panic(kdLT0) + case ldab < kd+1: + panic(badLdA) + case anorm < 0: + panic(badNorm) + } + + // Quick return if possible. + if n == 0 { + return 1 + } + + switch { + case len(ab) < (n-1)*ldab+kd+1: + panic(shortAB) + case len(work) < 3*n: + panic(shortWork) + case len(iwork) < n: + panic(shortIWork) + } + + // Quick return if possible. + if anorm == 0 { + return 0 + } + + const smlnum = dlamchS + + var ( + ainvnm float64 + kase int + isave [3]int + normin bool + + // Denote work slices. + x = work[:n] + v = work[n : 2*n] + cnorm = work[2*n : 3*n] + ) + // Estimate the 1-norm of the inverse. + bi := blas64.Implementation() + for { + ainvnm, kase = impl.Dlacn2(n, v, x, iwork, ainvnm, kase, &isave) + if kase == 0 { + break + } + var op1, op2 blas.Transpose + if uplo == blas.Upper { + // Multiply x by inv(Uᵀ), + op1 = blas.Trans + // then by inv(Uᵀ). + op2 = blas.NoTrans + } else { + // Multiply x by inv(L), + op1 = blas.NoTrans + // then by inv(Lᵀ). + op2 = blas.Trans + } + scaleL := impl.Dlatbs(uplo, op1, blas.NonUnit, normin, n, kd, ab, ldab, x, cnorm) + normin = true + scaleU := impl.Dlatbs(uplo, op2, blas.NonUnit, normin, n, kd, ab, ldab, x, cnorm) + // Multiply x by 1/scale if doing so will not cause overflow. + scale := scaleL * scaleU + if scale != 1 { + ix := bi.Idamax(n, x, 1) + if scale < math.Abs(x[ix])*smlnum || scale == 0 { + return 0 + } + impl.Drscl(n, scale, x, 1) + } + } + if ainvnm == 0 { + return 0 + } + // Return the estimate of the reciprocal condition number. + return (1 / ainvnm) / anorm +} diff --git a/vendor/gonum.org/v1/gonum/lapack/gonum/dpbtf2.go b/vendor/gonum.org/v1/gonum/lapack/gonum/dpbtf2.go new file mode 100644 index 0000000000..8150e56802 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dpbtf2.go @@ -0,0 +1,114 @@ +// Copyright ©2017 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gonum + +import ( + "math" + + "gonum.org/v1/gonum/blas" + "gonum.org/v1/gonum/blas/blas64" +) + +// Dpbtf2 computes the Cholesky factorization of a symmetric positive banded +// matrix ab. The matrix ab is n×n with kd diagonal bands. The Cholesky +// factorization computed is +// +// A = Uᵀ * U if ul == blas.Upper +// A = L * Lᵀ if ul == blas.Lower +// +// ul also specifies the storage of ab. If ul == blas.Upper, then +// ab is stored as an upper-triangular banded matrix with kd super-diagonals, +// and if ul == blas.Lower, ab is stored as a lower-triangular banded matrix +// with kd sub-diagonals. On exit, the banded matrix U or L is stored in-place +// into ab depending on the value of ul. Dpbtf2 returns whether the factorization +// was successfully completed. +// +// The band storage scheme is illustrated below when n = 6, and kd = 2. +// The resulting Cholesky decomposition is stored in the same elements as the +// input band matrix (a11 becomes u11 or l11, etc.). +// +// ul = blas.Upper +// a11 a12 a13 +// a22 a23 a24 +// a33 a34 a35 +// a44 a45 a46 +// a55 a56 * +// a66 * * +// +// ul = blas.Lower +// * * a11 +// * a21 a22 +// a31 a32 a33 +// a42 a43 a44 +// a53 a54 a55 +// a64 a65 a66 +// +// Dpbtf2 is the unblocked version of the algorithm, see Dpbtrf for the blocked +// version. +// +// Dpbtf2 is an internal routine, exported for testing purposes. +func (Implementation) Dpbtf2(uplo blas.Uplo, n, kd int, ab []float64, ldab int) (ok bool) { + switch { + case uplo != blas.Upper && uplo != blas.Lower: + panic(badUplo) + case n < 0: + panic(nLT0) + case kd < 0: + panic(kdLT0) + case ldab < kd+1: + panic(badLdA) + } + + // Quick return if possible. + if n == 0 { + return true + } + + if len(ab) < (n-1)*ldab+kd+1 { + panic(shortAB) + } + + bi := blas64.Implementation() + + kld := max(1, ldab-1) + if uplo == blas.Upper { + // Compute the Cholesky factorization A = Uᵀ * U. + for j := 0; j < n; j++ { + // Compute U(j,j) and test for non-positive-definiteness. + ajj := ab[j*ldab] + if ajj <= 0 { + return false + } + ajj = math.Sqrt(ajj) + ab[j*ldab] = ajj + // Compute elements j+1:j+kn of row j and update the trailing submatrix + // within the band. + kn := min(kd, n-j-1) + if kn > 0 { + bi.Dscal(kn, 1/ajj, ab[j*ldab+1:], 1) + bi.Dsyr(blas.Upper, kn, -1, ab[j*ldab+1:], 1, ab[(j+1)*ldab:], kld) + } + } + return true + } + // Compute the Cholesky factorization A = L * Lᵀ. + for j := 0; j < n; j++ { + // Compute L(j,j) and test for non-positive-definiteness. + ajj := ab[j*ldab+kd] + if ajj <= 0 { + return false + } + ajj = math.Sqrt(ajj) + ab[j*ldab+kd] = ajj + // Compute elements j+1:j+kn of column j and update the trailing submatrix + // within the band. + kn := min(kd, n-j-1) + if kn > 0 { + bi.Dscal(kn, 1/ajj, ab[(j+1)*ldab+kd-1:], kld) + bi.Dsyr(blas.Lower, kn, -1, ab[(j+1)*ldab+kd-1:], kld, ab[(j+1)*ldab+kd:], kld) + } + } + return true +} diff --git a/vendor/gonum.org/v1/gonum/lapack/gonum/dpbtrf.go b/vendor/gonum.org/v1/gonum/lapack/gonum/dpbtrf.go new file mode 100644 index 0000000000..12cdfc0fab --- /dev/null +++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dpbtrf.go @@ -0,0 +1,216 @@ +// Copyright ©2019 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gonum + +import ( + "gonum.org/v1/gonum/blas" + "gonum.org/v1/gonum/blas/blas64" +) + +// Dpbtrf computes the Cholesky factorization of an n×n symmetric positive +// definite band matrix +// +// A = Uᵀ * U if uplo == blas.Upper +// A = L * Lᵀ if uplo == blas.Lower +// +// where U is an upper triangular band matrix and L is lower triangular. kd is +// the number of super- or sub-diagonals of A. +// +// The band storage scheme is illustrated below when n = 6 and kd = 2. Elements +// marked * are not used by the function. +// +// uplo == blas.Upper +// On entry: On return: +// a00 a01 a02 u00 u01 u02 +// a11 a12 a13 u11 u12 u13 +// a22 a23 a24 u22 u23 u24 +// a33 a34 a35 u33 u34 u35 +// a44 a45 * u44 u45 * +// a55 * * u55 * * +// +// uplo == blas.Lower +// On entry: On return: +// * * a00 * * l00 +// * a10 a11 * l10 l11 +// a20 a21 a22 l20 l21 l22 +// a31 a32 a33 l31 l32 l33 +// a42 a43 a44 l42 l43 l44 +// a53 a54 a55 l53 l54 l55 +func (impl Implementation) Dpbtrf(uplo blas.Uplo, n, kd int, ab []float64, ldab int) (ok bool) { + const nbmax = 32 + + switch { + case uplo != blas.Upper && uplo != blas.Lower: + panic(badUplo) + case n < 0: + panic(nLT0) + case kd < 0: + panic(kdLT0) + case ldab < kd+1: + panic(badLdA) + } + + // Quick return if possible. + if n == 0 { + return true + } + + if len(ab) < (n-1)*ldab+kd+1 { + panic(shortAB) + } + + opts := string(blas.Upper) + if uplo == blas.Lower { + opts = string(blas.Lower) + } + nb := impl.Ilaenv(1, "DPBTRF", opts, n, kd, -1, -1) + // The block size must not exceed the semi-bandwidth kd, and must not + // exceed the limit set by the size of the local array work. + nb = min(nb, nbmax) + + if nb <= 1 || kd < nb { + // Use unblocked code. + return impl.Dpbtf2(uplo, n, kd, ab, ldab) + } + + // Use blocked code. + ldwork := nb + work := make([]float64, nb*ldwork) + bi := blas64.Implementation() + if uplo == blas.Upper { + // Compute the Cholesky factorization of a symmetric band + // matrix, given the upper triangle of the matrix in band + // storage. + + // Process the band matrix one diagonal block at a time. + for i := 0; i < n; i += nb { + ib := min(nb, n-i) + // Factorize the diagonal block. + ok := impl.Dpotf2(uplo, ib, ab[i*ldab:], ldab-1) + if !ok { + return false + } + if i+ib >= n { + continue + } + // Update the relevant part of the trailing submatrix. + // If A11 denotes the diagonal block which has just been + // factorized, then we need to update the remaining + // blocks in the diagram: + // + // A11 A12 A13 + // A22 A23 + // A33 + // + // The numbers of rows and columns in the partitioning + // are ib, i2, i3 respectively. The blocks A12, A22 and + // A23 are empty if ib = kd. The upper triangle of A13 + // lies outside the band. + i2 := min(kd-ib, n-i-ib) + if i2 > 0 { + // Update A12. + bi.Dtrsm(blas.Left, blas.Upper, blas.Trans, blas.NonUnit, ib, i2, + 1, ab[i*ldab:], ldab-1, ab[i*ldab+ib:], ldab-1) + // Update A22. + bi.Dsyrk(blas.Upper, blas.Trans, i2, ib, + -1, ab[i*ldab+ib:], ldab-1, 1, ab[(i+ib)*ldab:], ldab-1) + } + i3 := min(ib, n-i-kd) + if i3 > 0 { + // Copy the lower triangle of A13 into the work array. + for ii := 0; ii < ib; ii++ { + for jj := 0; jj <= min(ii, i3-1); jj++ { + work[ii*ldwork+jj] = ab[(i+ii)*ldab+kd-ii+jj] + } + } + // Update A13 (in the work array). + bi.Dtrsm(blas.Left, blas.Upper, blas.Trans, blas.NonUnit, ib, i3, + 1, ab[i*ldab:], ldab-1, work, ldwork) + // Update A23. + if i2 > 0 { + bi.Dgemm(blas.Trans, blas.NoTrans, i2, i3, ib, + -1, ab[i*ldab+ib:], ldab-1, work, ldwork, + 1, ab[(i+ib)*ldab+kd-ib:], ldab-1) + } + // Update A33. + bi.Dsyrk(blas.Upper, blas.Trans, i3, ib, + -1, work, ldwork, 1, ab[(i+kd)*ldab:], ldab-1) + // Copy the lower triangle of A13 back into place. + for ii := 0; ii < ib; ii++ { + for jj := 0; jj <= min(ii, i3-1); jj++ { + ab[(i+ii)*ldab+kd-ii+jj] = work[ii*ldwork+jj] + } + } + } + } + } else { + // Compute the Cholesky factorization of a symmetric band + // matrix, given the lower triangle of the matrix in band + // storage. + + // Process the band matrix one diagonal block at a time. + for i := 0; i < n; i += nb { + ib := min(nb, n-i) + // Factorize the diagonal block. + ok := impl.Dpotf2(uplo, ib, ab[i*ldab+kd:], ldab-1) + if !ok { + return false + } + if i+ib >= n { + continue + } + // Update the relevant part of the trailing submatrix. + // If A11 denotes the diagonal block which has just been + // factorized, then we need to update the remaining + // blocks in the diagram: + // + // A11 + // A21 A22 + // A31 A32 A33 + // + // The numbers of rows and columns in the partitioning + // are ib, i2, i3 respectively. The blocks A21, A22 and + // A32 are empty if ib = kd. The lowr triangle of A31 + // lies outside the band. + i2 := min(kd-ib, n-i-ib) + if i2 > 0 { + // Update A21. + bi.Dtrsm(blas.Right, blas.Lower, blas.Trans, blas.NonUnit, i2, ib, + 1, ab[i*ldab+kd:], ldab-1, ab[(i+ib)*ldab+kd-ib:], ldab-1) + // Update A22. + bi.Dsyrk(blas.Lower, blas.NoTrans, i2, ib, + -1, ab[(i+ib)*ldab+kd-ib:], ldab-1, 1, ab[(i+ib)*ldab+kd:], ldab-1) + } + i3 := min(ib, n-i-kd) + if i3 > 0 { + // Copy the upper triangle of A31 into the work array. + for ii := 0; ii < i3; ii++ { + for jj := ii; jj < ib; jj++ { + work[ii*ldwork+jj] = ab[(ii+i+kd)*ldab+jj-ii] + } + } + // Update A31 (in the work array). + bi.Dtrsm(blas.Right, blas.Lower, blas.Trans, blas.NonUnit, i3, ib, + 1, ab[i*ldab+kd:], ldab-1, work, ldwork) + // Update A32. + if i2 > 0 { + bi.Dgemm(blas.NoTrans, blas.Trans, i3, i2, ib, + -1, work, ldwork, ab[(i+ib)*ldab+kd-ib:], ldab-1, + 1, ab[(i+kd)*ldab+ib:], ldab-1) + } + // Update A33. + bi.Dsyrk(blas.Lower, blas.NoTrans, i3, ib, + -1, work, ldwork, 1, ab[(i+kd)*ldab+kd:], ldab-1) + // Copy the upper triangle of A31 back into place. + for ii := 0; ii < i3; ii++ { + for jj := ii; jj < ib; jj++ { + ab[(ii+i+kd)*ldab+jj-ii] = work[ii*ldwork+jj] + } + } + } + } + } + return true +} diff --git a/vendor/gonum.org/v1/gonum/lapack/gonum/dpbtrs.go b/vendor/gonum.org/v1/gonum/lapack/gonum/dpbtrs.go new file mode 100644 index 0000000000..97c9ada00b --- /dev/null +++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dpbtrs.go @@ -0,0 +1,69 @@ +// Copyright ©2019 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gonum + +import ( + "gonum.org/v1/gonum/blas" + "gonum.org/v1/gonum/blas/blas64" +) + +// Dpbtrs solves a system of linear equations A*X = B with an n×n symmetric +// positive definite band matrix A using the Cholesky factorization +// +// A = Uᵀ * U if uplo == blas.Upper +// A = L * Lᵀ if uplo == blas.Lower +// +// computed by Dpbtrf. kd is the number of super- or sub-diagonals of A. See the +// documentation for Dpbtrf for a description of the band storage format of A. +// +// On entry, b contains the n×nrhs right hand side matrix B. On return, it is +// overwritten with the solution matrix X. +func (Implementation) Dpbtrs(uplo blas.Uplo, n, kd, nrhs int, ab []float64, ldab int, b []float64, ldb int) { + switch { + case uplo != blas.Upper && uplo != blas.Lower: + panic(badUplo) + case n < 0: + panic(nLT0) + case kd < 0: + panic(kdLT0) + case nrhs < 0: + panic(nrhsLT0) + case ldab < kd+1: + panic(badLdA) + case ldb < max(1, nrhs): + panic(badLdB) + } + + // Quick return if possible. + if n == 0 || nrhs == 0 { + return + } + + if len(ab) < (n-1)*ldab+kd+1 { + panic(shortAB) + } + if len(b) < (n-1)*ldb+nrhs { + panic(shortB) + } + + bi := blas64.Implementation() + if uplo == blas.Upper { + // Solve A*X = B where A = Uᵀ*U. + for j := 0; j < nrhs; j++ { + // Solve Uᵀ*Y = B, overwriting B with Y. + bi.Dtbsv(blas.Upper, blas.Trans, blas.NonUnit, n, kd, ab, ldab, b[j:], ldb) + // Solve U*X = Y, overwriting Y with X. + bi.Dtbsv(blas.Upper, blas.NoTrans, blas.NonUnit, n, kd, ab, ldab, b[j:], ldb) + } + } else { + // Solve A*X = B where A = L*Lᵀ. + for j := 0; j < nrhs; j++ { + // Solve L*Y = B, overwriting B with Y. + bi.Dtbsv(blas.Lower, blas.NoTrans, blas.NonUnit, n, kd, ab, ldab, b[j:], ldb) + // Solve Lᵀ*X = Y, overwriting Y with X. + bi.Dtbsv(blas.Lower, blas.Trans, blas.NonUnit, n, kd, ab, ldab, b[j:], ldb) + } + } +} diff --git a/vendor/gonum.org/v1/gonum/lapack/gonum/dpocon.go b/vendor/gonum.org/v1/gonum/lapack/gonum/dpocon.go new file mode 100644 index 0000000000..7af4c18728 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dpocon.go @@ -0,0 +1,90 @@ +// Copyright ©2015 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gonum + +import ( + "math" + + "gonum.org/v1/gonum/blas" + "gonum.org/v1/gonum/blas/blas64" +) + +// Dpocon estimates the reciprocal of the condition number of a positive-definite +// matrix A given the Cholesky decomposition of A. The condition number computed +// is based on the 1-norm and the ∞-norm. +// +// anorm is the 1-norm and the ∞-norm of the original matrix A. +// +// work is a temporary data slice of length at least 3*n and Dpocon will panic otherwise. +// +// iwork is a temporary data slice of length at least n and Dpocon will panic otherwise. +func (impl Implementation) Dpocon(uplo blas.Uplo, n int, a []float64, lda int, anorm float64, work []float64, iwork []int) float64 { + switch { + case uplo != blas.Upper && uplo != blas.Lower: + panic(badUplo) + case n < 0: + panic(nLT0) + case lda < max(1, n): + panic(badLdA) + case anorm < 0: + panic(negANorm) + } + + // Quick return if possible. + if n == 0 { + return 1 + } + + switch { + case len(a) < (n-1)*lda+n: + panic(shortA) + case len(work) < 3*n: + panic(shortWork) + case len(iwork) < n: + panic(shortIWork) + } + + if anorm == 0 { + return 0 + } + + bi := blas64.Implementation() + + var ( + smlnum = dlamchS + rcond float64 + sl, su float64 + normin bool + ainvnm float64 + kase int + isave [3]int + ) + for { + ainvnm, kase = impl.Dlacn2(n, work[n:], work, iwork, ainvnm, kase, &isave) + if kase == 0 { + if ainvnm != 0 { + rcond = (1 / ainvnm) / anorm + } + return rcond + } + if uplo == blas.Upper { + sl = impl.Dlatrs(blas.Upper, blas.Trans, blas.NonUnit, normin, n, a, lda, work, work[2*n:]) + normin = true + su = impl.Dlatrs(blas.Upper, blas.NoTrans, blas.NonUnit, normin, n, a, lda, work, work[2*n:]) + } else { + sl = impl.Dlatrs(blas.Lower, blas.NoTrans, blas.NonUnit, normin, n, a, lda, work, work[2*n:]) + normin = true + su = impl.Dlatrs(blas.Lower, blas.Trans, blas.NonUnit, normin, n, a, lda, work, work[2*n:]) + } + scale := sl * su + if scale != 1 { + ix := bi.Idamax(n, work, 1) + if scale == 0 || scale < math.Abs(work[ix])*smlnum { + return rcond + } + impl.Drscl(n, scale, work, 1) + } + } +} diff --git a/vendor/gonum.org/v1/gonum/lapack/gonum/dpotf2.go b/vendor/gonum.org/v1/gonum/lapack/gonum/dpotf2.go new file mode 100644 index 0000000000..83411f1cf1 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dpotf2.go @@ -0,0 +1,82 @@ +// Copyright ©2015 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gonum + +import ( + "math" + + "gonum.org/v1/gonum/blas" + "gonum.org/v1/gonum/blas/blas64" +) + +// Dpotf2 computes the Cholesky decomposition of the symmetric positive definite +// matrix a. If ul == blas.Upper, then a is stored as an upper-triangular matrix, +// and a = Uᵀ U is stored in place into a. If ul == blas.Lower, then a = L Lᵀ +// is computed and stored in-place into a. If a is not positive definite, false +// is returned. This is the unblocked version of the algorithm. +// +// Dpotf2 is an internal routine. It is exported for testing purposes. +func (Implementation) Dpotf2(ul blas.Uplo, n int, a []float64, lda int) (ok bool) { + switch { + case ul != blas.Upper && ul != blas.Lower: + panic(badUplo) + case n < 0: + panic(nLT0) + case lda < max(1, n): + panic(badLdA) + } + + // Quick return if possible. + if n == 0 { + return true + } + + if len(a) < (n-1)*lda+n { + panic(shortA) + } + + bi := blas64.Implementation() + + if ul == blas.Upper { + for j := 0; j < n; j++ { + ajj := a[j*lda+j] + if j != 0 { + ajj -= bi.Ddot(j, a[j:], lda, a[j:], lda) + } + if ajj <= 0 || math.IsNaN(ajj) { + a[j*lda+j] = ajj + return false + } + ajj = math.Sqrt(ajj) + a[j*lda+j] = ajj + if j < n-1 { + bi.Dgemv(blas.Trans, j, n-j-1, + -1, a[j+1:], lda, a[j:], lda, + 1, a[j*lda+j+1:], 1) + bi.Dscal(n-j-1, 1/ajj, a[j*lda+j+1:], 1) + } + } + return true + } + for j := 0; j < n; j++ { + ajj := a[j*lda+j] + if j != 0 { + ajj -= bi.Ddot(j, a[j*lda:], 1, a[j*lda:], 1) + } + if ajj <= 0 || math.IsNaN(ajj) { + a[j*lda+j] = ajj + return false + } + ajj = math.Sqrt(ajj) + a[j*lda+j] = ajj + if j < n-1 { + bi.Dgemv(blas.NoTrans, n-j-1, j, + -1, a[(j+1)*lda:], lda, a[j*lda:], 1, + 1, a[(j+1)*lda+j:], lda) + bi.Dscal(n-j-1, 1/ajj, a[(j+1)*lda+j:], lda) + } + } + return true +} diff --git a/vendor/gonum.org/v1/gonum/lapack/gonum/dpotrf.go b/vendor/gonum.org/v1/gonum/lapack/gonum/dpotrf.go new file mode 100644 index 0000000000..7c81680166 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dpotrf.go @@ -0,0 +1,81 @@ +// Copyright ©2015 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gonum + +import ( + "gonum.org/v1/gonum/blas" + "gonum.org/v1/gonum/blas/blas64" +) + +// Dpotrf computes the Cholesky decomposition of the symmetric positive definite +// matrix a. If ul == blas.Upper, then a is stored as an upper-triangular matrix, +// and a = Uᵀ U is stored in place into a. If ul == blas.Lower, then a = L Lᵀ +// is computed and stored in-place into a. If a is not positive definite, false +// is returned. This is the blocked version of the algorithm. +func (impl Implementation) Dpotrf(ul blas.Uplo, n int, a []float64, lda int) (ok bool) { + switch { + case ul != blas.Upper && ul != blas.Lower: + panic(badUplo) + case n < 0: + panic(nLT0) + case lda < max(1, n): + panic(badLdA) + } + + // Quick return if possible. + if n == 0 { + return true + } + + if len(a) < (n-1)*lda+n { + panic(shortA) + } + + nb := impl.Ilaenv(1, "DPOTRF", string(ul), n, -1, -1, -1) + if nb <= 1 || n <= nb { + return impl.Dpotf2(ul, n, a, lda) + } + bi := blas64.Implementation() + if ul == blas.Upper { + for j := 0; j < n; j += nb { + jb := min(nb, n-j) + bi.Dsyrk(blas.Upper, blas.Trans, jb, j, + -1, a[j:], lda, + 1, a[j*lda+j:], lda) + ok = impl.Dpotf2(blas.Upper, jb, a[j*lda+j:], lda) + if !ok { + return ok + } + if j+jb < n { + bi.Dgemm(blas.Trans, blas.NoTrans, jb, n-j-jb, j, + -1, a[j:], lda, a[j+jb:], lda, + 1, a[j*lda+j+jb:], lda) + bi.Dtrsm(blas.Left, blas.Upper, blas.Trans, blas.NonUnit, jb, n-j-jb, + 1, a[j*lda+j:], lda, + a[j*lda+j+jb:], lda) + } + } + return true + } + for j := 0; j < n; j += nb { + jb := min(nb, n-j) + bi.Dsyrk(blas.Lower, blas.NoTrans, jb, j, + -1, a[j*lda:], lda, + 1, a[j*lda+j:], lda) + ok := impl.Dpotf2(blas.Lower, jb, a[j*lda+j:], lda) + if !ok { + return ok + } + if j+jb < n { + bi.Dgemm(blas.NoTrans, blas.Trans, n-j-jb, jb, j, + -1, a[(j+jb)*lda:], lda, a[j*lda:], lda, + 1, a[(j+jb)*lda+j:], lda) + bi.Dtrsm(blas.Right, blas.Lower, blas.Trans, blas.NonUnit, n-j-jb, jb, + 1, a[j*lda+j:], lda, + a[(j+jb)*lda+j:], lda) + } + } + return true +} diff --git a/vendor/gonum.org/v1/gonum/lapack/gonum/dpotri.go b/vendor/gonum.org/v1/gonum/lapack/gonum/dpotri.go new file mode 100644 index 0000000000..6fa981c130 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dpotri.go @@ -0,0 +1,44 @@ +// Copyright ©2019 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gonum + +import "gonum.org/v1/gonum/blas" + +// Dpotri computes the inverse of a real symmetric positive definite matrix A +// using its Cholesky factorization. +// +// On entry, a contains the triangular factor U or L from the Cholesky +// factorization A = Uᵀ*U or A = L*Lᵀ, as computed by Dpotrf. +// On return, a contains the upper or lower triangle of the (symmetric) +// inverse of A, overwriting the input factor U or L. +func (impl Implementation) Dpotri(uplo blas.Uplo, n int, a []float64, lda int) (ok bool) { + switch { + case uplo != blas.Upper && uplo != blas.Lower: + panic(badUplo) + case n < 0: + panic(nLT0) + case lda < max(1, n): + panic(badLdA) + } + + // Quick return if possible. + if n == 0 { + return true + } + + if len(a) < (n-1)*lda+n { + panic(shortA) + } + + // Invert the triangular Cholesky factor U or L. + ok = impl.Dtrtri(uplo, blas.NonUnit, n, a, lda) + if !ok { + return false + } + + // Form inv(U)*inv(U)ᵀ or inv(L)ᵀ*inv(L). + impl.Dlauum(uplo, n, a, lda) + return true +} diff --git a/vendor/gonum.org/v1/gonum/lapack/gonum/dpotrs.go b/vendor/gonum.org/v1/gonum/lapack/gonum/dpotrs.go new file mode 100644 index 0000000000..77d070001a --- /dev/null +++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dpotrs.go @@ -0,0 +1,64 @@ +// Copyright ©2018 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gonum + +import ( + "gonum.org/v1/gonum/blas" + "gonum.org/v1/gonum/blas/blas64" +) + +// Dpotrs solves a system of n linear equations A*X = B where A is an n×n +// symmetric positive definite matrix and B is an n×nrhs matrix. The matrix A is +// represented by its Cholesky factorization +// +// A = Uᵀ*U if uplo == blas.Upper +// A = L*Lᵀ if uplo == blas.Lower +// +// as computed by Dpotrf. On entry, B contains the right-hand side matrix B, on +// return it contains the solution matrix X. +func (Implementation) Dpotrs(uplo blas.Uplo, n, nrhs int, a []float64, lda int, b []float64, ldb int) { + switch { + case uplo != blas.Upper && uplo != blas.Lower: + panic(badUplo) + case n < 0: + panic(nLT0) + case nrhs < 0: + panic(nrhsLT0) + case lda < max(1, n): + panic(badLdA) + case ldb < max(1, nrhs): + panic(badLdB) + } + + // Quick return if possible. + if n == 0 || nrhs == 0 { + return + } + + switch { + case len(a) < (n-1)*lda+n: + panic(shortA) + case len(b) < (n-1)*ldb+nrhs: + panic(shortB) + } + + bi := blas64.Implementation() + + if uplo == blas.Upper { + // Solve Uᵀ * U * X = B where U is stored in the upper triangle of A. + + // Solve Uᵀ * X = B, overwriting B with X. + bi.Dtrsm(blas.Left, blas.Upper, blas.Trans, blas.NonUnit, n, nrhs, 1, a, lda, b, ldb) + // Solve U * X = B, overwriting B with X. + bi.Dtrsm(blas.Left, blas.Upper, blas.NoTrans, blas.NonUnit, n, nrhs, 1, a, lda, b, ldb) + } else { + // Solve L * Lᵀ * X = B where L is stored in the lower triangle of A. + + // Solve L * X = B, overwriting B with X. + bi.Dtrsm(blas.Left, blas.Lower, blas.NoTrans, blas.NonUnit, n, nrhs, 1, a, lda, b, ldb) + // Solve Lᵀ * X = B, overwriting B with X. + bi.Dtrsm(blas.Left, blas.Lower, blas.Trans, blas.NonUnit, n, nrhs, 1, a, lda, b, ldb) + } +} diff --git a/vendor/gonum.org/v1/gonum/lapack/gonum/dpstf2.go b/vendor/gonum.org/v1/gonum/lapack/gonum/dpstf2.go new file mode 100644 index 0000000000..79b607ddc9 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dpstf2.go @@ -0,0 +1,202 @@ +// Copyright ©2021 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gonum + +import ( + "math" + + "gonum.org/v1/gonum/blas" + "gonum.org/v1/gonum/blas/blas64" +) + +// Dpstf2 computes the Cholesky factorization with complete pivoting of an n×n +// symmetric positive semidefinite matrix A. +// +// The factorization has the form +// +// Pᵀ * A * P = Uᵀ * U , if uplo = blas.Upper, +// Pᵀ * A * P = L * Lᵀ, if uplo = blas.Lower, +// +// where U is an upper triangular matrix, L is lower triangular, and P is a +// permutation matrix. +// +// tol is a user-defined tolerance. The algorithm terminates if the pivot is +// less than or equal to tol. If tol is negative, then n*eps*max(A[k,k]) will be +// used instead. +// +// On return, A contains the factor U or L from the Cholesky factorization and +// piv contains P stored such that P[piv[k],k] = 1. +// +// Dpstf2 returns the computed rank of A and whether the factorization can be +// used to solve a system. Dpstf2 does not attempt to check that A is positive +// semi-definite, so if ok is false, the matrix A is either rank deficient or is +// not positive semidefinite. +// +// The length of piv must be n and the length of work must be at least 2*n, +// otherwise Dpstf2 will panic. +// +// Dpstf2 is an internal routine. It is exported for testing purposes. +func (Implementation) Dpstf2(uplo blas.Uplo, n int, a []float64, lda int, piv []int, tol float64, work []float64) (rank int, ok bool) { + switch { + case uplo != blas.Upper && uplo != blas.Lower: + panic(badUplo) + case n < 0: + panic(nLT0) + case lda < max(1, n): + panic(badLdA) + } + + // Quick return if possible. + if n == 0 { + return 0, true + } + + switch { + case len(a) < (n-1)*lda+n: + panic(shortA) + case len(piv) != n: + panic(badLenPiv) + case len(work) < 2*n: + panic(shortWork) + } + + // Initialize piv. + for i := range piv[:n] { + piv[i] = i + } + + // Compute the first pivot. + pvt := 0 + ajj := a[0] + for i := 1; i < n; i++ { + aii := a[i*lda+i] + if aii > ajj { + pvt = i + ajj = aii + } + } + if ajj <= 0 || math.IsNaN(ajj) { + return 0, false + } + + // Compute stopping value if not supplied. + dstop := tol + if dstop < 0 { + dstop = float64(n) * dlamchE * ajj + } + + // Set first half of work to zero, holds dot products. + dots := work[:n] + for i := range dots { + dots[i] = 0 + } + work2 := work[n : 2*n] + + bi := blas64.Implementation() + if uplo == blas.Upper { + // Compute the Cholesky factorization Pᵀ * A * P = Uᵀ * U. + for j := 0; j < n; j++ { + // Update dot products and compute possible pivots which are stored + // in the second half of work. + for i := j; i < n; i++ { + if j > 0 { + tmp := a[(j-1)*lda+i] + dots[i] += tmp * tmp + } + work2[i] = a[i*lda+i] - dots[i] + } + if j > 0 { + // Find the pivot. + pvt = j + ajj = work2[pvt] + for k := j + 1; k < n; k++ { + wk := work2[k] + if wk > ajj { + pvt = k + ajj = wk + } + } + // Test for exit. + if ajj <= dstop || math.IsNaN(ajj) { + a[j*lda+j] = ajj + return j, false + } + } + if j != pvt { + // Swap pivot rows and columns. + a[pvt*lda+pvt] = a[j*lda+j] + bi.Dswap(j, a[j:], lda, a[pvt:], lda) + if pvt < n-1 { + bi.Dswap(n-pvt-1, a[j*lda+(pvt+1):], 1, a[pvt*lda+(pvt+1):], 1) + } + bi.Dswap(pvt-j-1, a[j*lda+(j+1):], 1, a[(j+1)*lda+pvt:], lda) + // Swap dot products and piv. + dots[j], dots[pvt] = dots[pvt], dots[j] + piv[j], piv[pvt] = piv[pvt], piv[j] + } + ajj = math.Sqrt(ajj) + a[j*lda+j] = ajj + // Compute elements j+1:n of row j. + if j < n-1 { + bi.Dgemv(blas.Trans, j, n-j-1, + -1, a[j+1:], lda, a[j:], lda, + 1, a[j*lda+j+1:], 1) + bi.Dscal(n-j-1, 1/ajj, a[j*lda+j+1:], 1) + } + } + } else { + // Compute the Cholesky factorization Pᵀ * A * P = L * Lᵀ. + for j := 0; j < n; j++ { + // Update dot products and compute possible pivots which are stored + // in the second half of work. + for i := j; i < n; i++ { + if j > 0 { + tmp := a[i*lda+(j-1)] + dots[i] += tmp * tmp + } + work2[i] = a[i*lda+i] - dots[i] + } + if j > 0 { + // Find the pivot. + pvt = j + ajj = work2[pvt] + for k := j + 1; k < n; k++ { + wk := work2[k] + if wk > ajj { + pvt = k + ajj = wk + } + } + // Test for exit. + if ajj <= dstop || math.IsNaN(ajj) { + a[j*lda+j] = ajj + return j, false + } + } + if j != pvt { + // Swap pivot rows and columns. + a[pvt*lda+pvt] = a[j*lda+j] + bi.Dswap(j, a[j*lda:], 1, a[pvt*lda:], 1) + if pvt < n-1 { + bi.Dswap(n-pvt-1, a[(pvt+1)*lda+j:], lda, a[(pvt+1)*lda+pvt:], lda) + } + bi.Dswap(pvt-j-1, a[(j+1)*lda+j:], lda, a[pvt*lda+(j+1):], 1) + // Swap dot products and piv. + dots[j], dots[pvt] = dots[pvt], dots[j] + piv[j], piv[pvt] = piv[pvt], piv[j] + } + ajj = math.Sqrt(ajj) + a[j*lda+j] = ajj + // Compute elements j+1:n of column j. + if j < n-1 { + bi.Dgemv(blas.NoTrans, n-j-1, j, + -1, a[(j+1)*lda:], lda, a[j*lda:], 1, + 1, a[(j+1)*lda+j:], lda) + bi.Dscal(n-j-1, 1/ajj, a[(j+1)*lda+j:], lda) + } + } + } + return n, true +} diff --git a/vendor/gonum.org/v1/gonum/lapack/gonum/dpstrf.go b/vendor/gonum.org/v1/gonum/lapack/gonum/dpstrf.go new file mode 100644 index 0000000000..46a2fd4b77 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dpstrf.go @@ -0,0 +1,233 @@ +// Copyright ©2021 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gonum + +import ( + "math" + + "gonum.org/v1/gonum/blas" + "gonum.org/v1/gonum/blas/blas64" +) + +// Dpstrf computes the Cholesky factorization with complete pivoting of an n×n +// symmetric positive semidefinite matrix A. +// +// The factorization has the form +// +// Pᵀ * A * P = Uᵀ * U , if uplo = blas.Upper, +// Pᵀ * A * P = L * Lᵀ, if uplo = blas.Lower, +// +// where U is an upper triangular matrix, L is lower triangular, and P is a +// permutation matrix. +// +// tol is a user-defined tolerance. The algorithm terminates if the pivot is +// less than or equal to tol. If tol is negative, then n*eps*max(A[k,k]) will be +// used instead. +// +// On return, A contains the factor U or L from the Cholesky factorization and +// piv contains P stored such that P[piv[k],k] = 1. +// +// Dpstrf returns the computed rank of A and whether the factorization can be +// used to solve a system. Dpstrf does not attempt to check that A is positive +// semi-definite, so if ok is false, the matrix A is either rank deficient or is +// not positive semidefinite. +// +// The length of piv must be n and the length of work must be at least 2*n, +// otherwise Dpstrf will panic. +// +// Dpstrf is an internal routine. It is exported for testing purposes. +func (impl Implementation) Dpstrf(uplo blas.Uplo, n int, a []float64, lda int, piv []int, tol float64, work []float64) (rank int, ok bool) { + switch { + case uplo != blas.Upper && uplo != blas.Lower: + panic(badUplo) + case n < 0: + panic(nLT0) + case lda < max(1, n): + panic(badLdA) + } + + // Quick return if possible. + if n == 0 { + return 0, true + } + + switch { + case len(a) < (n-1)*lda+n: + panic(shortA) + case len(piv) != n: + panic(badLenPiv) + case len(work) < 2*n: + panic(shortWork) + } + + // Get block size. + nb := impl.Ilaenv(1, "DPOTRF", string(uplo), n, -1, -1, -1) + if nb <= 1 || n <= nb { + // Use unblocked code. + return impl.Dpstf2(uplo, n, a, lda, piv, tol, work) + } + + // Initialize piv. + for i := range piv[:n] { + piv[i] = i + } + + // Compute the first pivot. + pvt := 0 + ajj := a[0] + for i := 1; i < n; i++ { + aii := a[i*lda+i] + if aii > ajj { + pvt = i + ajj = aii + } + } + if ajj <= 0 || math.IsNaN(ajj) { + return 0, false + } + + // Compute stopping value if not supplied. + dstop := tol + if dstop < 0 { + dstop = float64(n) * dlamchE * ajj + } + + bi := blas64.Implementation() + // Split work in half, the first half holds dot products. + dots := work[:n] + work2 := work[n : 2*n] + if uplo == blas.Upper { + // Compute the Cholesky factorization Pᵀ * A * P = Uᵀ * U. + for k := 0; k < n; k += nb { + // Account for last block not being nb wide. + jb := min(nb, n-k) + // Set relevant part of dot products to zero. + for i := k; i < n; i++ { + dots[i] = 0 + } + for j := k; j < k+jb; j++ { + // Update dot products and compute possible pivots which are stored + // in the second half of work. + for i := j; i < n; i++ { + if j > k { + tmp := a[(j-1)*lda+i] + dots[i] += tmp * tmp + } + work2[i] = a[i*lda+i] - dots[i] + } + if j > 0 { + // Find the pivot. + pvt = j + ajj = work2[pvt] + for l := j + 1; l < n; l++ { + wl := work2[l] + if wl > ajj { + pvt = l + ajj = wl + } + } + // Test for exit. + if ajj <= dstop || math.IsNaN(ajj) { + a[j*lda+j] = ajj + return j, false + } + } + if j != pvt { + // Swap pivot rows and columns. + a[pvt*lda+pvt] = a[j*lda+j] + bi.Dswap(j, a[j:], lda, a[pvt:], lda) + if pvt < n-1 { + bi.Dswap(n-pvt-1, a[j*lda+(pvt+1):], 1, a[pvt*lda+(pvt+1):], 1) + } + bi.Dswap(pvt-j-1, a[j*lda+(j+1):], 1, a[(j+1)*lda+pvt:], lda) + // Swap dot products and piv. + dots[j], dots[pvt] = dots[pvt], dots[j] + piv[j], piv[pvt] = piv[pvt], piv[j] + } + ajj = math.Sqrt(ajj) + a[j*lda+j] = ajj + // Compute elements j+1:n of row j. + if j < n-1 { + bi.Dgemv(blas.Trans, j-k, n-j-1, + -1, a[k*lda+j+1:], lda, a[k*lda+j:], lda, + 1, a[j*lda+j+1:], 1) + bi.Dscal(n-j-1, 1/ajj, a[j*lda+j+1:], 1) + } + } + // Update trailing matrix. + if k+jb < n { + j := k + jb + bi.Dsyrk(blas.Upper, blas.Trans, n-j, jb, + -1, a[k*lda+j:], lda, 1, a[j*lda+j:], lda) + } + } + } else { + // Compute the Cholesky factorization Pᵀ * A * P = L * Lᵀ. + for k := 0; k < n; k += nb { + // Account for last block not being nb wide. + jb := min(nb, n-k) + // Set relevant part of dot products to zero. + for i := k; i < n; i++ { + dots[i] = 0 + } + for j := k; j < k+jb; j++ { + // Update dot products and compute possible pivots which are stored + // in the second half of work. + for i := j; i < n; i++ { + if j > k { + tmp := a[i*lda+(j-1)] + dots[i] += tmp * tmp + } + work2[i] = a[i*lda+i] - dots[i] + } + if j > 0 { + // Find the pivot. + pvt = j + ajj = work2[pvt] + for l := j + 1; l < n; l++ { + wl := work2[l] + if wl > ajj { + pvt = l + ajj = wl + } + } + // Test for exit. + if ajj <= dstop || math.IsNaN(ajj) { + a[j*lda+j] = ajj + return j, false + } + } + if j != pvt { + // Swap pivot rows and columns. + a[pvt*lda+pvt] = a[j*lda+j] + bi.Dswap(j, a[j*lda:], 1, a[pvt*lda:], 1) + if pvt < n-1 { + bi.Dswap(n-pvt-1, a[(pvt+1)*lda+j:], lda, a[(pvt+1)*lda+pvt:], lda) + } + bi.Dswap(pvt-j-1, a[(j+1)*lda+j:], lda, a[pvt*lda+(j+1):], 1) + // Swap dot products and piv. + dots[j], dots[pvt] = dots[pvt], dots[j] + piv[j], piv[pvt] = piv[pvt], piv[j] + } + ajj = math.Sqrt(ajj) + a[j*lda+j] = ajj + // Compute elements j+1:n of column j. + if j < n-1 { + bi.Dgemv(blas.NoTrans, n-j-1, j-k, + -1, a[(j+1)*lda+k:], lda, a[j*lda+k:], 1, + 1, a[(j+1)*lda+j:], lda) + bi.Dscal(n-j-1, 1/ajj, a[(j+1)*lda+j:], lda) + } + } + // Update trailing matrix. + if k+jb < n { + j := k + jb + bi.Dsyrk(blas.Lower, blas.NoTrans, n-j, jb, + -1, a[j*lda+k:], lda, 1, a[j*lda+j:], lda) + } + } + } + return n, true +} diff --git a/vendor/gonum.org/v1/gonum/lapack/gonum/dptcon.go b/vendor/gonum.org/v1/gonum/lapack/gonum/dptcon.go new file mode 100644 index 0000000000..cd41e3175a --- /dev/null +++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dptcon.go @@ -0,0 +1,99 @@ +// Copyright ©2023 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gonum + +import ( + "math" + + "gonum.org/v1/gonum/blas/blas64" +) + +// Dptcon computes and returns the reciprocal of the condition number (in the +// 1-norm) of a symmetric positive definite tridiagonal matrix A using the +// factorization A = L*D*Lᵀ or A = Uᵀ*D*U computed by Dpttrf. +// +// The reciprocal of the condition number is computed as +// +// rcond = 1 / (anorm * ‖A⁻¹‖) +// +// and ‖A⁻¹‖ is computed by a direct method. +// +// d and e contain, respectively, the n diagonal elements of the diagonal matrix +// D and the (n-1) off-diagonal elements of the unit bidiagonal factor U or L +// from the factorization of A, as computed by Dpttrf. +// +// anorm is the 1-norm of the original matrix A. +// +// work must have length n, otherwise Dptcon will panic. +func (impl Implementation) Dptcon(n int, d, e []float64, anorm float64, work []float64) (rcond float64) { + switch { + case n < 0: + panic(nLT0) + case anorm < 0: + panic(badNorm) + } + + // Quick return if possible. + if n == 0 { + return 1 + } + + switch { + case len(d) < n: + panic(shortD) + case len(e) < n-1: + panic(shortE) + case len(work) < n: + panic(shortWork) + } + + // Quick return if possible. + switch { + case anorm == 0: + return 0 + case math.IsNaN(anorm): + // Propagate NaN. + return anorm + case math.IsInf(anorm, 1): + return 0 + } + + // Check that d[0:n] is positive. + for _, di := range d[:n] { + if di <= 0 { + return 0 + } + } + + // Solve M(A) * x = e, where M(A) = (m[i,j]) is given by + // + // m[i,j] = abs(A[i,j]), i == j, + // m[i,j] = -abs(A[i,j]), i != j, + // + // and e = [1,1,...,1]ᵀ. Note M(A) = M(L)*D*M(L)ᵀ. + + // Solve M(L) * b = e. + work[0] = 1 + for i := 1; i < n; i++ { + work[i] = 1 + work[i-1]*math.Abs(e[i-1]) + } + + // Solve D * M(L)ᵀ * x = b. + work[n-1] /= d[n-1] + for i := n - 2; i >= 0; i-- { + work[i] = work[i]/d[i] + work[i+1]*math.Abs(e[i]) + } + + // Compute ainvnm = max(x[i]), 0<=i 0 +} diff --git a/vendor/gonum.org/v1/gonum/lapack/gonum/dpttrs.go b/vendor/gonum.org/v1/gonum/lapack/gonum/dpttrs.go new file mode 100644 index 0000000000..7bdee6f937 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dpttrs.go @@ -0,0 +1,51 @@ +// Copyright ©2023 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gonum + +// Dpttrs solves a tridiagonal system of the form +// +// A * X = B +// +// using the L*D*Lᵀ factorization of A computed by Dpttrf. D is a diagonal +// matrix specified in d, L is a unit bidiagonal matrix whose subdiagonal is +// specified in e, and X and B are n×nrhs matrices. +func (impl Implementation) Dpttrs(n, nrhs int, d, e []float64, b []float64, ldb int) { + switch { + case n < 0: + panic(nLT0) + case nrhs < 0: + panic(nrhsLT0) + case ldb < max(1, nrhs): + panic(badLdB) + } + + // Quick return if possible. + if n == 0 || nrhs == 0 { + return + } + + switch { + case len(d) < n: + panic(shortD) + case len(e) < n-1: + panic(shortE) + case len(b) < (n-1)*ldb+nrhs: + panic(shortB) + } + + nb := 1 + if nrhs > 1 { + nb = max(1, impl.Ilaenv(1, "DPTTRS", " ", n, nrhs, -1, -1)) + } + + if nb >= nrhs { + impl.dptts2(n, nrhs, d, e, b, ldb) + } else { + for j := 0; j < nrhs; j += nb { + jb := min(nrhs-j, nb) + impl.dptts2(n, jb, d, e, b[j:], ldb) + } + } +} diff --git a/vendor/gonum.org/v1/gonum/lapack/gonum/dptts2.go b/vendor/gonum.org/v1/gonum/lapack/gonum/dptts2.go new file mode 100644 index 0000000000..ff1df168f2 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dptts2.go @@ -0,0 +1,39 @@ +// Copyright ©2023 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gonum + +import "gonum.org/v1/gonum/blas/blas64" + +// dptts2 solves a tridiagonal system of the form +// +// A * X = B +// +// using the L*D*Lᵀ factorization of A computed by Dpttrf. D is a diagonal +// matrix specified in d, L is a unit bidiagonal matrix whose subdiagonal is +// specified in e, and X and B are n×nrhs matrices. +func (impl Implementation) dptts2(n, nrhs int, d, e []float64, b []float64, ldb int) { + // Quick return if possible. + if n <= 1 { + if n == 1 { + bi := blas64.Implementation() + bi.Dscal(nrhs, 1/d[0], b, 1) + } + return + } + + // Solve A * X = B using the factorization A = L*D*Lᵀ, overwriting each + // right hand side vector with its solution. + for j := 0; j < nrhs; j++ { + // Solve L * x = b. + for i := 1; i < n; i++ { + b[i*ldb+j] -= b[(i-1)*ldb+j] * e[i-1] + } + // Solve D * Lᵀ * x = b. + b[(n-1)*ldb+j] /= d[n-1] + for i := n - 2; i >= 0; i-- { + b[i*ldb+j] = b[i*ldb+j]/d[i] - b[(i+1)*ldb+j]*e[i] + } + } +} diff --git a/vendor/gonum.org/v1/gonum/lapack/gonum/drscl.go b/vendor/gonum.org/v1/gonum/lapack/gonum/drscl.go new file mode 100644 index 0000000000..b2772dbc22 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/lapack/gonum/drscl.go @@ -0,0 +1,63 @@ +// Copyright ©2015 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gonum + +import ( + "math" + + "gonum.org/v1/gonum/blas/blas64" +) + +// Drscl multiplies the vector x by 1/a being careful to avoid overflow or +// underflow where possible. +// +// Drscl is an internal routine. It is exported for testing purposes. +func (impl Implementation) Drscl(n int, a float64, x []float64, incX int) { + switch { + case n < 0: + panic(nLT0) + case incX <= 0: + panic(badIncX) + } + + // Quick return if possible. + if n == 0 { + return + } + + if len(x) < 1+(n-1)*incX { + panic(shortX) + } + + bi := blas64.Implementation() + + cden := a + cnum := 1.0 + smlnum := dlamchS + bignum := 1 / smlnum + for { + cden1 := cden * smlnum + cnum1 := cnum / bignum + var mul float64 + var done bool + switch { + case cnum != 0 && math.Abs(cden1) > math.Abs(cnum): + mul = smlnum + done = false + cden = cden1 + case math.Abs(cnum1) > math.Abs(cden): + mul = bignum + done = false + cnum = cnum1 + default: + mul = cnum / cden + done = true + } + bi.Dscal(n, mul, x, incX) + if done { + break + } + } +} diff --git a/vendor/gonum.org/v1/gonum/lapack/gonum/dsteqr.go b/vendor/gonum.org/v1/gonum/lapack/gonum/dsteqr.go new file mode 100644 index 0000000000..d6c7861ab5 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dsteqr.go @@ -0,0 +1,376 @@ +// Copyright ©2016 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gonum + +import ( + "math" + + "gonum.org/v1/gonum/blas" + "gonum.org/v1/gonum/blas/blas64" + "gonum.org/v1/gonum/lapack" +) + +// Dsteqr computes the eigenvalues and optionally the eigenvectors of a symmetric +// tridiagonal matrix using the implicit QL or QR method. The eigenvectors of a +// full or band symmetric matrix can also be found if Dsytrd, Dsptrd, or Dsbtrd +// have been used to reduce this matrix to tridiagonal form. +// +// d, on entry, contains the diagonal elements of the tridiagonal matrix. On exit, +// d contains the eigenvalues in ascending order. d must have length n and +// Dsteqr will panic otherwise. +// +// e, on entry, contains the off-diagonal elements of the tridiagonal matrix on +// entry, and is overwritten during the call to Dsteqr. e must have length n-1 and +// Dsteqr will panic otherwise. +// +// z, on entry, contains the n×n orthogonal matrix used in the reduction to +// tridiagonal form if compz == lapack.EVOrig. On exit, if +// compz == lapack.EVOrig, z contains the orthonormal eigenvectors of the +// original symmetric matrix, and if compz == lapack.EVTridiag, z contains the +// orthonormal eigenvectors of the symmetric tridiagonal matrix. z is not used +// if compz == lapack.EVCompNone. +// +// work must have length at least max(1, 2*n-2) if the eigenvectors are computed, +// and Dsteqr will panic otherwise. +// +// Dsteqr is an internal routine. It is exported for testing purposes. +func (impl Implementation) Dsteqr(compz lapack.EVComp, n int, d, e, z []float64, ldz int, work []float64) (ok bool) { + switch { + case compz != lapack.EVCompNone && compz != lapack.EVTridiag && compz != lapack.EVOrig: + panic(badEVComp) + case n < 0: + panic(nLT0) + case ldz < 1, compz != lapack.EVCompNone && ldz < n: + panic(badLdZ) + } + + // Quick return if possible. + if n == 0 { + return true + } + + switch { + case len(d) < n: + panic(shortD) + case len(e) < n-1: + panic(shortE) + case compz != lapack.EVCompNone && len(z) < (n-1)*ldz+n: + panic(shortZ) + case compz != lapack.EVCompNone && len(work) < max(1, 2*n-2): + panic(shortWork) + } + + var icompz int + if compz == lapack.EVOrig { + icompz = 1 + } else if compz == lapack.EVTridiag { + icompz = 2 + } + + if n == 1 { + if icompz == 2 { + z[0] = 1 + } + return true + } + + bi := blas64.Implementation() + + eps := dlamchE + eps2 := eps * eps + safmin := dlamchS + safmax := 1 / safmin + ssfmax := math.Sqrt(safmax) / 3 + ssfmin := math.Sqrt(safmin) / eps2 + + // Compute the eigenvalues and eigenvectors of the tridiagonal matrix. + if icompz == 2 { + impl.Dlaset(blas.All, n, n, 0, 1, z, ldz) + } + const maxit = 30 + nmaxit := n * maxit + + jtot := 0 + + // Determine where the matrix splits and choose QL or QR iteration for each + // block, according to whether top or bottom diagonal element is smaller. + l1 := 0 + nm1 := n - 1 + + type scaletype int + const ( + down scaletype = iota + 1 + up + ) + var iscale scaletype + + for { + if l1 > n-1 { + // Order eigenvalues and eigenvectors. + if icompz == 0 { + impl.Dlasrt(lapack.SortIncreasing, n, d) + } else { + // TODO(btracey): Consider replacing this sort with a call to sort.Sort. + for ii := 1; ii < n; ii++ { + i := ii - 1 + k := i + p := d[i] + for j := ii; j < n; j++ { + if d[j] < p { + k = j + p = d[j] + } + } + if k != i { + d[k] = d[i] + d[i] = p + bi.Dswap(n, z[i:], ldz, z[k:], ldz) + } + } + } + return true + } + if l1 > 0 { + e[l1-1] = 0 + } + var m int + if l1 <= nm1 { + for m = l1; m < nm1; m++ { + test := math.Abs(e[m]) + if test == 0 { + break + } + if test <= (math.Sqrt(math.Abs(d[m]))*math.Sqrt(math.Abs(d[m+1])))*eps { + e[m] = 0 + break + } + } + } + l := l1 + lsv := l + lend := m + lendsv := lend + l1 = m + 1 + if lend == l { + continue + } + + // Scale submatrix in rows and columns L to Lend + anorm := impl.Dlanst(lapack.MaxAbs, lend-l+1, d[l:], e[l:]) + switch { + case anorm == 0: + continue + case anorm > ssfmax: + iscale = down + // Pretend that d and e are matrices with 1 column. + impl.Dlascl(lapack.General, 0, 0, anorm, ssfmax, lend-l+1, 1, d[l:], 1) + impl.Dlascl(lapack.General, 0, 0, anorm, ssfmax, lend-l, 1, e[l:], 1) + case anorm < ssfmin: + iscale = up + impl.Dlascl(lapack.General, 0, 0, anorm, ssfmin, lend-l+1, 1, d[l:], 1) + impl.Dlascl(lapack.General, 0, 0, anorm, ssfmin, lend-l, 1, e[l:], 1) + } + + // Choose between QL and QR. + if math.Abs(d[lend]) < math.Abs(d[l]) { + lend = lsv + l = lendsv + } + if lend > l { + // QL Iteration. Look for small subdiagonal element. + for { + if l != lend { + for m = l; m < lend; m++ { + v := math.Abs(e[m]) + if v*v <= (eps2*math.Abs(d[m]))*math.Abs(d[m+1])+safmin { + break + } + } + } else { + m = lend + } + if m < lend { + e[m] = 0 + } + p := d[l] + if m == l { + // Eigenvalue found. + l++ + if l > lend { + break + } + continue + } + + // If remaining matrix is 2×2, use Dlae2 to compute its eigensystem. + if m == l+1 { + if icompz > 0 { + d[l], d[l+1], work[l], work[n-1+l] = impl.Dlaev2(d[l], e[l], d[l+1]) + impl.Dlasr(blas.Right, lapack.Variable, lapack.Backward, + n, 2, work[l:], work[n-1+l:], z[l:], ldz) + } else { + d[l], d[l+1] = impl.Dlae2(d[l], e[l], d[l+1]) + } + e[l] = 0 + l += 2 + if l > lend { + break + } + continue + } + + if jtot == nmaxit { + break + } + jtot++ + + // Form shift + g := (d[l+1] - p) / (2 * e[l]) + r := impl.Dlapy2(g, 1) + g = d[m] - p + e[l]/(g+math.Copysign(r, g)) + s := 1.0 + c := 1.0 + p = 0.0 + + // Inner loop + for i := m - 1; i >= l; i-- { + f := s * e[i] + b := c * e[i] + c, s, r = impl.Dlartg(g, f) + if i != m-1 { + e[i+1] = r + } + g = d[i+1] - p + r = (d[i]-g)*s + 2*c*b + p = s * r + d[i+1] = g + p + g = c*r - b + + // If eigenvectors are desired, then save rotations. + if icompz > 0 { + work[i] = c + work[n-1+i] = -s + } + } + // If eigenvectors are desired, then apply saved rotations. + if icompz > 0 { + mm := m - l + 1 + impl.Dlasr(blas.Right, lapack.Variable, lapack.Backward, + n, mm, work[l:], work[n-1+l:], z[l:], ldz) + } + d[l] -= p + e[l] = g + } + } else { + // QR Iteration. + // Look for small superdiagonal element. + for { + if l != lend { + for m = l; m > lend; m-- { + v := math.Abs(e[m-1]) + if v*v <= (eps2*math.Abs(d[m])*math.Abs(d[m-1]) + safmin) { + break + } + } + } else { + m = lend + } + if m > lend { + e[m-1] = 0 + } + p := d[l] + if m == l { + // Eigenvalue found + l-- + if l < lend { + break + } + continue + } + + // If remaining matrix is 2×2, use Dlae2 to compute its eigenvalues. + if m == l-1 { + if icompz > 0 { + d[l-1], d[l], work[m], work[n-1+m] = impl.Dlaev2(d[l-1], e[l-1], d[l]) + impl.Dlasr(blas.Right, lapack.Variable, lapack.Forward, + n, 2, work[m:], work[n-1+m:], z[l-1:], ldz) + } else { + d[l-1], d[l] = impl.Dlae2(d[l-1], e[l-1], d[l]) + } + e[l-1] = 0 + l -= 2 + if l < lend { + break + } + continue + } + if jtot == nmaxit { + break + } + jtot++ + + // Form shift. + g := (d[l-1] - p) / (2 * e[l-1]) + r := impl.Dlapy2(g, 1) + g = d[m] - p + (e[l-1])/(g+math.Copysign(r, g)) + s := 1.0 + c := 1.0 + p = 0.0 + + // Inner loop. + for i := m; i < l; i++ { + f := s * e[i] + b := c * e[i] + c, s, r = impl.Dlartg(g, f) + if i != m { + e[i-1] = r + } + g = d[i] - p + r = (d[i+1]-g)*s + 2*c*b + p = s * r + d[i] = g + p + g = c*r - b + + // If eigenvectors are desired, then save rotations. + if icompz > 0 { + work[i] = c + work[n-1+i] = s + } + } + + // If eigenvectors are desired, then apply saved rotations. + if icompz > 0 { + mm := l - m + 1 + impl.Dlasr(blas.Right, lapack.Variable, lapack.Forward, + n, mm, work[m:], work[n-1+m:], z[m:], ldz) + } + d[l] -= p + e[l-1] = g + } + } + + // Undo scaling if necessary. + switch iscale { + case down: + // Pretend that d and e are matrices with 1 column. + impl.Dlascl(lapack.General, 0, 0, ssfmax, anorm, lendsv-lsv+1, 1, d[lsv:], 1) + impl.Dlascl(lapack.General, 0, 0, ssfmax, anorm, lendsv-lsv, 1, e[lsv:], 1) + case up: + impl.Dlascl(lapack.General, 0, 0, ssfmin, anorm, lendsv-lsv+1, 1, d[lsv:], 1) + impl.Dlascl(lapack.General, 0, 0, ssfmin, anorm, lendsv-lsv, 1, e[lsv:], 1) + } + + // Check for no convergence to an eigenvalue after a total of n*maxit iterations. + if jtot >= nmaxit { + break + } + } + for i := 0; i < n-1; i++ { + if e[i] != 0 { + return false + } + } + return true +} diff --git a/vendor/gonum.org/v1/gonum/lapack/gonum/dsterf.go b/vendor/gonum.org/v1/gonum/lapack/gonum/dsterf.go new file mode 100644 index 0000000000..dc1e178dfa --- /dev/null +++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dsterf.go @@ -0,0 +1,285 @@ +// Copyright ©2016 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gonum + +import ( + "math" + + "gonum.org/v1/gonum/lapack" +) + +// Dsterf computes all eigenvalues of a symmetric tridiagonal matrix using the +// Pal-Walker-Kahan variant of the QL or QR algorithm. +// +// d contains the diagonal elements of the tridiagonal matrix on entry, and +// contains the eigenvalues in ascending order on exit. d must have length at +// least n, or Dsterf will panic. +// +// e contains the off-diagonal elements of the tridiagonal matrix on entry, and is +// overwritten during the call to Dsterf. e must have length of at least n-1 or +// Dsterf will panic. +// +// Dsterf is an internal routine. It is exported for testing purposes. +func (impl Implementation) Dsterf(n int, d, e []float64) (ok bool) { + if n < 0 { + panic(nLT0) + } + + // Quick return if possible. + if n == 0 { + return true + } + + switch { + case len(d) < n: + panic(shortD) + case len(e) < n-1: + panic(shortE) + } + + if n == 1 { + return true + } + + const ( + none = 0 // The values are not scaled. + down = 1 // The values are scaled below ssfmax threshold. + up = 2 // The values are scaled below ssfmin threshold. + ) + + // Determine the unit roundoff for this environment. + eps := dlamchE + eps2 := eps * eps + safmin := dlamchS + safmax := 1 / safmin + ssfmax := math.Sqrt(safmax) / 3 + ssfmin := math.Sqrt(safmin) / eps2 + + // Compute the eigenvalues of the tridiagonal matrix. + maxit := 30 + nmaxit := n * maxit + jtot := 0 + + l1 := 0 + + for { + if l1 > n-1 { + impl.Dlasrt(lapack.SortIncreasing, n, d) + return true + } + if l1 > 0 { + e[l1-1] = 0 + } + var m int + for m = l1; m < n-1; m++ { + if math.Abs(e[m]) <= math.Sqrt(math.Abs(d[m]))*math.Sqrt(math.Abs(d[m+1]))*eps { + e[m] = 0 + break + } + } + + l := l1 + lsv := l + lend := m + lendsv := lend + l1 = m + 1 + if lend == 0 { + continue + } + + // Scale submatrix in rows and columns l to lend. + anorm := impl.Dlanst(lapack.MaxAbs, lend-l+1, d[l:], e[l:]) + iscale := none + if anorm == 0 { + continue + } + if anorm > ssfmax { + iscale = down + impl.Dlascl(lapack.General, 0, 0, anorm, ssfmax, lend-l+1, 1, d[l:], n) + impl.Dlascl(lapack.General, 0, 0, anorm, ssfmax, lend-l, 1, e[l:], n) + } else if anorm < ssfmin { + iscale = up + impl.Dlascl(lapack.General, 0, 0, anorm, ssfmin, lend-l+1, 1, d[l:], n) + impl.Dlascl(lapack.General, 0, 0, anorm, ssfmin, lend-l, 1, e[l:], n) + } + + el := e[l:lend] + for i, v := range el { + el[i] *= v + } + + // Choose between QL and QR iteration. + if math.Abs(d[lend]) < math.Abs(d[l]) { + lend = lsv + l = lendsv + } + if lend >= l { + // QL Iteration. + // Look for small sub-diagonal element. + for { + if l != lend { + for m = l; m < lend; m++ { + if math.Abs(e[m]) <= eps2*(math.Abs(d[m]*d[m+1])) { + break + } + } + } else { + m = lend + } + if m < lend { + e[m] = 0 + } + p := d[l] + if m == l { + // Eigenvalue found. + l++ + if l > lend { + break + } + continue + } + // If remaining matrix is 2 by 2, use Dlae2 to compute its eigenvalues. + if m == l+1 { + d[l], d[l+1] = impl.Dlae2(d[l], math.Sqrt(e[l]), d[l+1]) + e[l] = 0 + l += 2 + if l > lend { + break + } + continue + } + if jtot == nmaxit { + break + } + jtot++ + + // Form shift. + rte := math.Sqrt(e[l]) + sigma := (d[l+1] - p) / (2 * rte) + r := impl.Dlapy2(sigma, 1) + sigma = p - (rte / (sigma + math.Copysign(r, sigma))) + + c := 1.0 + s := 0.0 + gamma := d[m] - sigma + p = gamma * gamma + + // Inner loop. + for i := m - 1; i >= l; i-- { + bb := e[i] + r := p + bb + if i != m-1 { + e[i+1] = s * r + } + oldc := c + c = p / r + s = bb / r + oldgam := gamma + alpha := d[i] + gamma = c*(alpha-sigma) - s*oldgam + d[i+1] = oldgam + (alpha - gamma) + if c != 0 { + p = (gamma * gamma) / c + } else { + p = oldc * bb + } + } + e[l] = s * p + d[l] = sigma + gamma + } + } else { + for { + // QR Iteration. + // Look for small super-diagonal element. + for m = l; m > lend; m-- { + if math.Abs(e[m-1]) <= eps2*math.Abs(d[m]*d[m-1]) { + break + } + } + if m > lend { + e[m-1] = 0 + } + p := d[l] + if m == l { + // Eigenvalue found. + l-- + if l < lend { + break + } + continue + } + + // If remaining matrix is 2 by 2, use Dlae2 to compute its eigenvalues. + if m == l-1 { + d[l], d[l-1] = impl.Dlae2(d[l], math.Sqrt(e[l-1]), d[l-1]) + e[l-1] = 0 + l -= 2 + if l < lend { + break + } + continue + } + if jtot == nmaxit { + break + } + jtot++ + + // Form shift. + rte := math.Sqrt(e[l-1]) + sigma := (d[l-1] - p) / (2 * rte) + r := impl.Dlapy2(sigma, 1) + sigma = p - (rte / (sigma + math.Copysign(r, sigma))) + + c := 1.0 + s := 0.0 + gamma := d[m] - sigma + p = gamma * gamma + + // Inner loop. + for i := m; i < l; i++ { + bb := e[i] + r := p + bb + if i != m { + e[i-1] = s * r + } + oldc := c + c = p / r + s = bb / r + oldgam := gamma + alpha := d[i+1] + gamma = c*(alpha-sigma) - s*oldgam + d[i] = oldgam + alpha - gamma + if c != 0 { + p = (gamma * gamma) / c + } else { + p = oldc * bb + } + } + e[l-1] = s * p + d[l] = sigma + gamma + } + } + + // Undo scaling if necessary + switch iscale { + case down: + impl.Dlascl(lapack.General, 0, 0, ssfmax, anorm, lendsv-lsv+1, 1, d[lsv:], n) + case up: + impl.Dlascl(lapack.General, 0, 0, ssfmin, anorm, lendsv-lsv+1, 1, d[lsv:], n) + } + + // Check for no convergence to an eigenvalue after a total of n*maxit iterations. + if jtot >= nmaxit { + break + } + } + for _, v := range e[:n-1] { + if v != 0 { + return false + } + } + impl.Dlasrt(lapack.SortIncreasing, n, d) + return true +} diff --git a/vendor/gonum.org/v1/gonum/lapack/gonum/dsyev.go b/vendor/gonum.org/v1/gonum/lapack/gonum/dsyev.go new file mode 100644 index 0000000000..5f57f3a5c9 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dsyev.go @@ -0,0 +1,130 @@ +// Copyright ©2016 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gonum + +import ( + "math" + + "gonum.org/v1/gonum/blas" + "gonum.org/v1/gonum/blas/blas64" + "gonum.org/v1/gonum/lapack" +) + +// Dsyev computes all eigenvalues and, optionally, the eigenvectors of a real +// symmetric matrix A. +// +// w contains the eigenvalues in ascending order upon return. w must have length +// at least n, and Dsyev will panic otherwise. +// +// On entry, a contains the elements of the symmetric matrix A in the triangular +// portion specified by uplo. If jobz == lapack.EVCompute, a contains the +// orthonormal eigenvectors of A on exit, otherwise jobz must be lapack.EVNone +// and on exit the specified triangular region is overwritten. +// +// work is temporary storage, and lwork specifies the usable memory length. At minimum, +// lwork >= 3*n-1, and Dsyev will panic otherwise. The amount of blocking is +// limited by the usable length. If lwork == -1, instead of computing Dsyev the +// optimal work length is stored into work[0]. +func (impl Implementation) Dsyev(jobz lapack.EVJob, uplo blas.Uplo, n int, a []float64, lda int, w, work []float64, lwork int) (ok bool) { + switch { + case jobz != lapack.EVNone && jobz != lapack.EVCompute: + panic(badEVJob) + case uplo != blas.Upper && uplo != blas.Lower: + panic(badUplo) + case n < 0: + panic(nLT0) + case lda < max(1, n): + panic(badLdA) + case lwork < max(1, 3*n-1) && lwork != -1: + panic(badLWork) + case len(work) < max(1, lwork): + panic(shortWork) + } + + // Quick return if possible. + if n == 0 { + return true + } + + var opts string + if uplo == blas.Upper { + opts = "U" + } else { + opts = "L" + } + nb := impl.Ilaenv(1, "DSYTRD", opts, n, -1, -1, -1) + lworkopt := max(1, (nb+2)*n) + if lwork == -1 { + work[0] = float64(lworkopt) + return + } + + switch { + case len(a) < (n-1)*lda+n: + panic(shortA) + case len(w) < n: + panic(shortW) + } + + if n == 1 { + w[0] = a[0] + work[0] = 2 + if jobz == lapack.EVCompute { + a[0] = 1 + } + return true + } + + safmin := dlamchS + eps := dlamchP + smlnum := safmin / eps + bignum := 1 / smlnum + rmin := math.Sqrt(smlnum) + rmax := math.Sqrt(bignum) + + // Scale matrix to allowable range, if necessary. + anrm := impl.Dlansy(lapack.MaxAbs, uplo, n, a, lda, work) + scaled := false + var sigma float64 + if anrm > 0 && anrm < rmin { + scaled = true + sigma = rmin / anrm + } else if anrm > rmax { + scaled = true + sigma = rmax / anrm + } + if scaled { + kind := lapack.LowerTri + if uplo == blas.Upper { + kind = lapack.UpperTri + } + impl.Dlascl(kind, 0, 0, 1, sigma, n, n, a, lda) + } + var inde int + indtau := inde + n + indwork := indtau + n + llwork := lwork - indwork + impl.Dsytrd(uplo, n, a, lda, w, work[inde:], work[indtau:], work[indwork:], llwork) + + // For eigenvalues only, call Dsterf. For eigenvectors, first call Dorgtr + // to generate the orthogonal matrix, then call Dsteqr. + if jobz == lapack.EVNone { + ok = impl.Dsterf(n, w, work[inde:]) + } else { + impl.Dorgtr(uplo, n, a, lda, work[indtau:], work[indwork:], llwork) + ok = impl.Dsteqr(lapack.EVComp(jobz), n, w, work[inde:], a, lda, work[indtau:]) + } + if !ok { + return false + } + + // If the matrix was scaled, then rescale eigenvalues appropriately. + if scaled { + bi := blas64.Implementation() + bi.Dscal(n, 1/sigma, w, 1) + } + work[0] = float64(lworkopt) + return true +} diff --git a/vendor/gonum.org/v1/gonum/lapack/gonum/dsytd2.go b/vendor/gonum.org/v1/gonum/lapack/gonum/dsytd2.go new file mode 100644 index 0000000000..03e7cc07b0 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dsytd2.go @@ -0,0 +1,147 @@ +// Copyright ©2016 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gonum + +import ( + "gonum.org/v1/gonum/blas" + "gonum.org/v1/gonum/blas/blas64" +) + +// Dsytd2 reduces a symmetric n×n matrix A to symmetric tridiagonal form T by +// an orthogonal similarity transformation +// +// Qᵀ * A * Q = T +// +// On entry, the matrix is contained in the specified triangle of a. On exit, +// if uplo == blas.Upper, the diagonal and first super-diagonal of a are +// overwritten with the elements of T. The elements above the first super-diagonal +// are overwritten with the elementary reflectors that are used with +// the elements written to tau in order to construct Q. If uplo == blas.Lower, +// the elements are written in the lower triangular region. +// +// d must have length at least n. e and tau must have length at least n-1. Dsytd2 +// will panic if these sizes are not met. +// +// Q is represented as a product of elementary reflectors. +// If uplo == blas.Upper +// +// Q = H_{n-2} * ... * H_1 * H_0 +// +// and if uplo == blas.Lower +// +// Q = H_0 * H_1 * ... * H_{n-2} +// +// where +// +// H_i = I - tau * v * vᵀ +// +// where tau is stored in tau[i], and v is stored in a. +// +// If uplo == blas.Upper, v[0:i-1] is stored in A[0:i-1,i+1], v[i] = 1, and +// v[i+1:] = 0. The elements of a are +// +// [ d e v2 v3 v4] +// [ d e v3 v4] +// [ d e v4] +// [ d e] +// [ d] +// +// If uplo == blas.Lower, v[0:i+1] = 0, v[i+1] = 1, and v[i+2:] is stored in +// A[i+2:n,i]. +// The elements of a are +// +// [ d ] +// [ e d ] +// [v1 e d ] +// [v1 v2 e d ] +// [v1 v2 v3 e d] +// +// Dsytd2 is an internal routine. It is exported for testing purposes. +func (impl Implementation) Dsytd2(uplo blas.Uplo, n int, a []float64, lda int, d, e, tau []float64) { + switch { + case uplo != blas.Upper && uplo != blas.Lower: + panic(badUplo) + case n < 0: + panic(nLT0) + case lda < max(1, n): + panic(badLdA) + } + + // Quick return if possible. + if n == 0 { + return + } + + switch { + case len(a) < (n-1)*lda+n: + panic(shortA) + case len(d) < n: + panic(shortD) + case len(e) < n-1: + panic(shortE) + case len(tau) < n-1: + panic(shortTau) + } + + bi := blas64.Implementation() + + if uplo == blas.Upper { + // Reduce the upper triangle of A. + for i := n - 2; i >= 0; i-- { + // Generate elementary reflector H_i = I - tau * v * vᵀ to + // annihilate A[i:i-1, i+1]. + var taui float64 + a[i*lda+i+1], taui = impl.Dlarfg(i+1, a[i*lda+i+1], a[i+1:], lda) + e[i] = a[i*lda+i+1] + if taui != 0 { + // Apply H_i from both sides to A[0:i,0:i]. + a[i*lda+i+1] = 1 + + // Compute x := tau * A * v storing x in tau[0:i]. + bi.Dsymv(uplo, i+1, taui, a, lda, a[i+1:], lda, 0, tau, 1) + + // Compute w := x - 1/2 * tau * (xᵀ * v) * v. + alpha := -0.5 * taui * bi.Ddot(i+1, tau, 1, a[i+1:], lda) + bi.Daxpy(i+1, alpha, a[i+1:], lda, tau, 1) + + // Apply the transformation as a rank-2 update + // A = A - v * wᵀ - w * vᵀ. + bi.Dsyr2(uplo, i+1, -1, a[i+1:], lda, tau, 1, a, lda) + a[i*lda+i+1] = e[i] + } + d[i+1] = a[(i+1)*lda+i+1] + tau[i] = taui + } + d[0] = a[0] + return + } + // Reduce the lower triangle of A. + for i := 0; i < n-1; i++ { + // Generate elementary reflector H_i = I - tau * v * vᵀ to + // annihilate A[i+2:n, i]. + var taui float64 + a[(i+1)*lda+i], taui = impl.Dlarfg(n-i-1, a[(i+1)*lda+i], a[min(i+2, n-1)*lda+i:], lda) + e[i] = a[(i+1)*lda+i] + if taui != 0 { + // Apply H_i from both sides to A[i+1:n, i+1:n]. + a[(i+1)*lda+i] = 1 + + // Compute x := tau * A * v, storing y in tau[i:n-1]. + bi.Dsymv(uplo, n-i-1, taui, a[(i+1)*lda+i+1:], lda, a[(i+1)*lda+i:], lda, 0, tau[i:], 1) + + // Compute w := x - 1/2 * tau * (xᵀ * v) * v. + alpha := -0.5 * taui * bi.Ddot(n-i-1, tau[i:], 1, a[(i+1)*lda+i:], lda) + bi.Daxpy(n-i-1, alpha, a[(i+1)*lda+i:], lda, tau[i:], 1) + + // Apply the transformation as a rank-2 update + // A = A - v * wᵀ - w * vᵀ. + bi.Dsyr2(uplo, n-i-1, -1, a[(i+1)*lda+i:], lda, tau[i:], 1, a[(i+1)*lda+i+1:], lda) + a[(i+1)*lda+i] = e[i] + } + d[i] = a[i*lda+i] + tau[i] = taui + } + d[n-1] = a[(n-1)*lda+n-1] +} diff --git a/vendor/gonum.org/v1/gonum/lapack/gonum/dsytrd.go b/vendor/gonum.org/v1/gonum/lapack/gonum/dsytrd.go new file mode 100644 index 0000000000..74d2287ed2 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dsytrd.go @@ -0,0 +1,184 @@ +// Copyright ©2016 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gonum + +import ( + "gonum.org/v1/gonum/blas" + "gonum.org/v1/gonum/blas/blas64" +) + +// Dsytrd reduces a symmetric n×n matrix A to symmetric tridiagonal form by an +// orthogonal similarity transformation +// +// Qᵀ * A * Q = T +// +// where Q is an orthonormal matrix and T is symmetric and tridiagonal. +// +// On entry, a contains the elements of the input matrix in the triangle specified +// by uplo. On exit, the diagonal and sub/super-diagonal are overwritten by the +// corresponding elements of the tridiagonal matrix T. The remaining elements in +// the triangle, along with the array tau, contain the data to construct Q as +// the product of elementary reflectors. +// +// If uplo == blas.Upper, Q is constructed with +// +// Q = H_{n-2} * ... * H_1 * H_0 +// +// where +// +// H_i = I - tau_i * v * vᵀ +// +// v is constructed as v[i+1:n] = 0, v[i] = 1, v[0:i-1] is stored in A[0:i-1, i+1]. +// The elements of A are +// +// [ d e v1 v2 v3] +// [ d e v2 v3] +// [ d e v3] +// [ d e] +// [ e] +// +// If uplo == blas.Lower, Q is constructed with +// +// Q = H_0 * H_1 * ... * H_{n-2} +// +// where +// +// H_i = I - tau_i * v * vᵀ +// +// v is constructed as v[0:i+1] = 0, v[i+1] = 1, v[i+2:n] is stored in A[i+2:n, i]. +// The elements of A are +// +// [ d ] +// [ e d ] +// [v0 e d ] +// [v0 v1 e d ] +// [v0 v1 v2 e d] +// +// d must have length n, and e and tau must have length n-1. Dsytrd will panic if +// these conditions are not met. +// +// work is temporary storage, and lwork specifies the usable memory length. At minimum, +// lwork >= 1, and Dsytrd will panic otherwise. The amount of blocking is +// limited by the usable length. +// If lwork == -1, instead of computing Dsytrd the optimal work length is stored +// into work[0]. +// +// Dsytrd is an internal routine. It is exported for testing purposes. +func (impl Implementation) Dsytrd(uplo blas.Uplo, n int, a []float64, lda int, d, e, tau, work []float64, lwork int) { + switch { + case uplo != blas.Upper && uplo != blas.Lower: + panic(badUplo) + case n < 0: + panic(nLT0) + case lda < max(1, n): + panic(badLdA) + case lwork < 1 && lwork != -1: + panic(badLWork) + case len(work) < max(1, lwork): + panic(shortWork) + } + + // Quick return if possible. + if n == 0 { + work[0] = 1 + return + } + + nb := impl.Ilaenv(1, "DSYTRD", string(uplo), n, -1, -1, -1) + lworkopt := n * nb + if lwork == -1 { + work[0] = float64(lworkopt) + return + } + + switch { + case len(a) < (n-1)*lda+n: + panic(shortA) + case len(d) < n: + panic(shortD) + case len(e) < n-1: + panic(shortE) + case len(tau) < n-1: + panic(shortTau) + } + + bi := blas64.Implementation() + + nx := n + iws := 1 + var ldwork int + if 1 < nb && nb < n { + // Determine when to cross over from blocked to unblocked code. The last + // block is always handled by unblocked code. + nx = max(nb, impl.Ilaenv(3, "DSYTRD", string(uplo), n, -1, -1, -1)) + if nx < n { + // Determine if workspace is large enough for blocked code. + ldwork = nb + iws = n * ldwork + if lwork < iws { + // Not enough workspace to use optimal nb: determine the minimum + // value of nb and reduce nb or force use of unblocked code by + // setting nx = n. + nb = max(lwork/n, 1) + nbmin := impl.Ilaenv(2, "DSYTRD", string(uplo), n, -1, -1, -1) + if nb < nbmin { + nx = n + } + } + } else { + nx = n + } + } else { + nb = 1 + } + ldwork = nb + + if uplo == blas.Upper { + // Reduce the upper triangle of A. Columns 0:kk are handled by the + // unblocked method. + var i int + kk := n - ((n-nx+nb-1)/nb)*nb + for i = n - nb; i >= kk; i -= nb { + // Reduce columns i:i+nb to tridiagonal form and form the matrix W + // which is needed to update the unreduced part of the matrix. + impl.Dlatrd(uplo, i+nb, nb, a, lda, e, tau, work, ldwork) + + // Update the unreduced submatrix A[0:i-1,0:i-1], using an update + // of the form A = A - V*Wᵀ - W*Vᵀ. + bi.Dsyr2k(uplo, blas.NoTrans, i, nb, -1, a[i:], lda, work, ldwork, 1, a, lda) + + // Copy superdiagonal elements back into A, and diagonal elements into D. + for j := i; j < i+nb; j++ { + a[(j-1)*lda+j] = e[j-1] + d[j] = a[j*lda+j] + } + } + // Use unblocked code to reduce the last or only block + // check that i == kk. + impl.Dsytd2(uplo, kk, a, lda, d, e, tau) + } else { + var i int + // Reduce the lower triangle of A. + for i = 0; i < n-nx; i += nb { + // Reduce columns 0:i+nb to tridiagonal form and form the matrix W + // which is needed to update the unreduced part of the matrix. + impl.Dlatrd(uplo, n-i, nb, a[i*lda+i:], lda, e[i:], tau[i:], work, ldwork) + + // Update the unreduced submatrix A[i+ib:n, i+ib:n], using an update + // of the form A = A + V*Wᵀ - W*Vᵀ. + bi.Dsyr2k(uplo, blas.NoTrans, n-i-nb, nb, -1, a[(i+nb)*lda+i:], lda, + work[nb*ldwork:], ldwork, 1, a[(i+nb)*lda+i+nb:], lda) + + // Copy subdiagonal elements back into A, and diagonal elements into D. + for j := i; j < i+nb; j++ { + a[(j+1)*lda+j] = e[j] + d[j] = a[j*lda+j] + } + } + // Use unblocked code to reduce the last or only block. + impl.Dsytd2(uplo, n-i, a[i*lda+i:], lda, d[i:], e[i:], tau[i:]) + } + work[0] = float64(iws) +} diff --git a/vendor/gonum.org/v1/gonum/lapack/gonum/dtbtrs.go b/vendor/gonum.org/v1/gonum/lapack/gonum/dtbtrs.go new file mode 100644 index 0000000000..6b56d9e0f0 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dtbtrs.go @@ -0,0 +1,77 @@ +// Copyright ©2020 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gonum + +import ( + "gonum.org/v1/gonum/blas" + "gonum.org/v1/gonum/blas/blas64" +) + +// Dtbtrs solves a triangular system of the form +// +// A * X = B if trans == blas.NoTrans +// Aᵀ * X = B if trans == blas.Trans or blas.ConjTrans +// +// where A is an n×n triangular band matrix with kd super- or subdiagonals, and +// B is an n×nrhs matrix. +// +// Dtbtrs returns whether A is non-singular. If A is singular, no solution X is +// computed. +func (impl Implementation) Dtbtrs(uplo blas.Uplo, trans blas.Transpose, diag blas.Diag, n, kd, nrhs int, a []float64, lda int, b []float64, ldb int) (ok bool) { + switch { + case uplo != blas.Upper && uplo != blas.Lower: + panic(badUplo) + case trans != blas.NoTrans && trans != blas.Trans && trans != blas.ConjTrans: + panic(badTrans) + case diag != blas.NonUnit && diag != blas.Unit: + panic(badDiag) + case n < 0: + panic(nLT0) + case kd < 0: + panic(kdLT0) + case nrhs < 0: + panic(nrhsLT0) + case lda < kd+1: + panic(badLdA) + case ldb < max(1, nrhs): + panic(badLdB) + } + + // Quick return if possible. + if n == 0 { + return true + } + + switch { + case len(a) < (n-1)*lda+kd+1: + panic(shortA) + case len(b) < (n-1)*ldb+nrhs: + panic(shortB) + } + + // Check for singularity. + if diag == blas.NonUnit { + if uplo == blas.Upper { + for i := 0; i < n; i++ { + if a[i*lda] == 0 { + return false + } + } + } else { + for i := 0; i < n; i++ { + if a[i*lda+kd] == 0 { + return false + } + } + } + } + + // Solve A * X = B or Aᵀ * X = B. + bi := blas64.Implementation() + for j := 0; j < nrhs; j++ { + bi.Dtbsv(uplo, trans, diag, n, kd, a, lda, b[j:], ldb) + } + return true +} diff --git a/vendor/gonum.org/v1/gonum/lapack/gonum/dtgsja.go b/vendor/gonum.org/v1/gonum/lapack/gonum/dtgsja.go new file mode 100644 index 0000000000..b3f0208a35 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dtgsja.go @@ -0,0 +1,389 @@ +// Copyright ©2017 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gonum + +import ( + "math" + + "gonum.org/v1/gonum/blas" + "gonum.org/v1/gonum/blas/blas64" + "gonum.org/v1/gonum/lapack" +) + +// Dtgsja computes the generalized singular value decomposition (GSVD) +// of two real upper triangular or trapezoidal matrices A and B. +// +// A and B have the following forms, which may be obtained by the +// preprocessing subroutine Dggsvp from a general m×n matrix A and p×n +// matrix B: +// +// n-k-l k l +// A = k [ 0 A12 A13 ] if m-k-l >= 0; +// l [ 0 0 A23 ] +// m-k-l [ 0 0 0 ] +// +// n-k-l k l +// A = k [ 0 A12 A13 ] if m-k-l < 0; +// m-k [ 0 0 A23 ] +// +// n-k-l k l +// B = l [ 0 0 B13 ] +// p-l [ 0 0 0 ] +// +// where the k×k matrix A12 and l×l matrix B13 are non-singular +// upper triangular. A23 is l×l upper triangular if m-k-l >= 0, +// otherwise A23 is (m-k)×l upper trapezoidal. +// +// On exit, +// +// Uᵀ*A*Q = D1*[ 0 R ], Vᵀ*B*Q = D2*[ 0 R ], +// +// where U, V and Q are orthogonal matrices. +// R is a non-singular upper triangular matrix, and D1 and D2 are +// diagonal matrices, which are of the following structures: +// +// If m-k-l >= 0, +// +// k l +// D1 = k [ I 0 ] +// l [ 0 C ] +// m-k-l [ 0 0 ] +// +// k l +// D2 = l [ 0 S ] +// p-l [ 0 0 ] +// +// n-k-l k l +// [ 0 R ] = k [ 0 R11 R12 ] k +// l [ 0 0 R22 ] l +// +// where +// +// C = diag( alpha_k, ... , alpha_{k+l} ), +// S = diag( beta_k, ... , beta_{k+l} ), +// C^2 + S^2 = I. +// +// R is stored in +// +// A[0:k+l, n-k-l:n] +// +// on exit. +// +// If m-k-l < 0, +// +// k m-k k+l-m +// D1 = k [ I 0 0 ] +// m-k [ 0 C 0 ] +// +// k m-k k+l-m +// D2 = m-k [ 0 S 0 ] +// k+l-m [ 0 0 I ] +// p-l [ 0 0 0 ] +// +// n-k-l k m-k k+l-m +// [ 0 R ] = k [ 0 R11 R12 R13 ] +// m-k [ 0 0 R22 R23 ] +// k+l-m [ 0 0 0 R33 ] +// +// where +// +// C = diag( alpha_k, ... , alpha_m ), +// S = diag( beta_k, ... , beta_m ), +// C^2 + S^2 = I. +// +// R = [ R11 R12 R13 ] is stored in A[0:m, n-k-l:n] +// [ 0 R22 R23 ] +// +// and R33 is stored in +// +// B[m-k:l, n+m-k-l:n] on exit. +// +// The computation of the orthogonal transformation matrices U, V or Q +// is optional. These matrices may either be formed explicitly, or they +// may be post-multiplied into input matrices U1, V1, or Q1. +// +// Dtgsja essentially uses a variant of Kogbetliantz algorithm to reduce +// min(l,m-k)×l triangular or trapezoidal matrix A23 and l×l +// matrix B13 to the form: +// +// U1ᵀ*A13*Q1 = C1*R1; V1ᵀ*B13*Q1 = S1*R1, +// +// where U1, V1 and Q1 are orthogonal matrices. C1 and S1 are diagonal +// matrices satisfying +// +// C1^2 + S1^2 = I, +// +// and R1 is an l×l non-singular upper triangular matrix. +// +// jobU, jobV and jobQ are options for computing the orthogonal matrices. The behavior +// is as follows +// +// jobU == lapack.GSVDU Compute orthogonal matrix U +// jobU == lapack.GSVDUnit Use unit-initialized matrix +// jobU == lapack.GSVDNone Do not compute orthogonal matrix. +// +// The behavior is the same for jobV and jobQ with the exception that instead of +// lapack.GSVDU these accept lapack.GSVDV and lapack.GSVDQ respectively. +// The matrices U, V and Q must be m×m, p×p and n×n respectively unless the +// relevant job parameter is lapack.GSVDNone. +// +// k and l specify the sub-blocks in the input matrices A and B: +// +// A23 = A[k:min(k+l,m), n-l:n) and B13 = B[0:l, n-l:n] +// +// of A and B, whose GSVD is going to be computed by Dtgsja. +// +// tola and tolb are the convergence criteria for the Jacobi-Kogbetliantz +// iteration procedure. Generally, they are the same as used in the preprocessing +// step, for example, +// +// tola = max(m, n)*norm(A)*eps, +// tolb = max(p, n)*norm(B)*eps, +// +// where eps is the machine epsilon. +// +// work must have length at least 2*n, otherwise Dtgsja will panic. +// +// alpha and beta must have length n or Dtgsja will panic. On exit, alpha and +// beta contain the generalized singular value pairs of A and B +// +// alpha[0:k] = 1, +// beta[0:k] = 0, +// +// if m-k-l >= 0, +// +// alpha[k:k+l] = diag(C), +// beta[k:k+l] = diag(S), +// +// if m-k-l < 0, +// +// alpha[k:m]= C, alpha[m:k+l]= 0 +// beta[k:m] = S, beta[m:k+l] = 1. +// +// if k+l < n, +// +// alpha[k+l:n] = 0 and +// beta[k+l:n] = 0. +// +// On exit, A[n-k:n, 0:min(k+l,m)] contains the triangular matrix R or part of R +// and if necessary, B[m-k:l, n+m-k-l:n] contains a part of R. +// +// Dtgsja returns whether the routine converged and the number of iteration cycles +// that were run. +// +// Dtgsja is an internal routine. It is exported for testing purposes. +func (impl Implementation) Dtgsja(jobU, jobV, jobQ lapack.GSVDJob, m, p, n, k, l int, a []float64, lda int, b []float64, ldb int, tola, tolb float64, alpha, beta, u []float64, ldu int, v []float64, ldv int, q []float64, ldq int, work []float64) (cycles int, ok bool) { + const maxit = 40 + + initu := jobU == lapack.GSVDUnit + wantu := initu || jobU == lapack.GSVDU + + initv := jobV == lapack.GSVDUnit + wantv := initv || jobV == lapack.GSVDV + + initq := jobQ == lapack.GSVDUnit + wantq := initq || jobQ == lapack.GSVDQ + + switch { + case !initu && !wantu && jobU != lapack.GSVDNone: + panic(badGSVDJob + "U") + case !initv && !wantv && jobV != lapack.GSVDNone: + panic(badGSVDJob + "V") + case !initq && !wantq && jobQ != lapack.GSVDNone: + panic(badGSVDJob + "Q") + case m < 0: + panic(mLT0) + case p < 0: + panic(pLT0) + case n < 0: + panic(nLT0) + + case lda < max(1, n): + panic(badLdA) + case len(a) < (m-1)*lda+n: + panic(shortA) + + case ldb < max(1, n): + panic(badLdB) + case len(b) < (p-1)*ldb+n: + panic(shortB) + + case len(alpha) != n: + panic(badLenAlpha) + case len(beta) != n: + panic(badLenBeta) + + case ldu < 1, wantu && ldu < m: + panic(badLdU) + case wantu && len(u) < (m-1)*ldu+m: + panic(shortU) + + case ldv < 1, wantv && ldv < p: + panic(badLdV) + case wantv && len(v) < (p-1)*ldv+p: + panic(shortV) + + case ldq < 1, wantq && ldq < n: + panic(badLdQ) + case wantq && len(q) < (n-1)*ldq+n: + panic(shortQ) + + case len(work) < 2*n: + panic(shortWork) + } + + // Initialize U, V and Q, if necessary + if initu { + impl.Dlaset(blas.All, m, m, 0, 1, u, ldu) + } + if initv { + impl.Dlaset(blas.All, p, p, 0, 1, v, ldv) + } + if initq { + impl.Dlaset(blas.All, n, n, 0, 1, q, ldq) + } + + bi := blas64.Implementation() + minTol := math.Min(tola, tolb) + + // Loop until convergence. + upper := false + for cycles = 1; cycles <= maxit; cycles++ { + upper = !upper + + for i := 0; i < l-1; i++ { + for j := i + 1; j < l; j++ { + var a1, a2, a3 float64 + if k+i < m { + a1 = a[(k+i)*lda+n-l+i] + } + if k+j < m { + a3 = a[(k+j)*lda+n-l+j] + } + + b1 := b[i*ldb+n-l+i] + b3 := b[j*ldb+n-l+j] + + var b2 float64 + if upper { + if k+i < m { + a2 = a[(k+i)*lda+n-l+j] + } + b2 = b[i*ldb+n-l+j] + } else { + if k+j < m { + a2 = a[(k+j)*lda+n-l+i] + } + b2 = b[j*ldb+n-l+i] + } + + csu, snu, csv, snv, csq, snq := impl.Dlags2(upper, a1, a2, a3, b1, b2, b3) + + // Update (k+i)-th and (k+j)-th rows of matrix A: Uᵀ*A. + if k+j < m { + bi.Drot(l, a[(k+j)*lda+n-l:], 1, a[(k+i)*lda+n-l:], 1, csu, snu) + } + + // Update i-th and j-th rows of matrix B: Vᵀ*B. + bi.Drot(l, b[j*ldb+n-l:], 1, b[i*ldb+n-l:], 1, csv, snv) + + // Update (n-l+i)-th and (n-l+j)-th columns of matrices + // A and B: A*Q and B*Q. + bi.Drot(min(k+l, m), a[n-l+j:], lda, a[n-l+i:], lda, csq, snq) + bi.Drot(l, b[n-l+j:], ldb, b[n-l+i:], ldb, csq, snq) + + if upper { + if k+i < m { + a[(k+i)*lda+n-l+j] = 0 + } + b[i*ldb+n-l+j] = 0 + } else { + if k+j < m { + a[(k+j)*lda+n-l+i] = 0 + } + b[j*ldb+n-l+i] = 0 + } + + // Update orthogonal matrices U, V, Q, if desired. + if wantu && k+j < m { + bi.Drot(m, u[k+j:], ldu, u[k+i:], ldu, csu, snu) + } + if wantv { + bi.Drot(p, v[j:], ldv, v[i:], ldv, csv, snv) + } + if wantq { + bi.Drot(n, q[n-l+j:], ldq, q[n-l+i:], ldq, csq, snq) + } + } + } + + if !upper { + // The matrices A13 and B13 were lower triangular at the start + // of the cycle, and are now upper triangular. + // + // Convergence test: test the parallelism of the corresponding + // rows of A and B. + var error float64 + for i := 0; i < min(l, m-k); i++ { + bi.Dcopy(l-i, a[(k+i)*lda+n-l+i:], 1, work, 1) + bi.Dcopy(l-i, b[i*ldb+n-l+i:], 1, work[l:], 1) + ssmin := impl.Dlapll(l-i, work, 1, work[l:], 1) + error = math.Max(error, ssmin) + } + if math.Abs(error) <= minTol { + // The algorithm has converged. + // Compute the generalized singular value pairs (alpha, beta) + // and set the triangular matrix R to array A. + for i := 0; i < k; i++ { + alpha[i] = 1 + beta[i] = 0 + } + + for i := 0; i < min(l, m-k); i++ { + a1 := a[(k+i)*lda+n-l+i] + b1 := b[i*ldb+n-l+i] + gamma := b1 / a1 + if !math.IsInf(gamma, 0) { + // Change sign if necessary. + if gamma < 0 { + bi.Dscal(l-i, -1, b[i*ldb+n-l+i:], 1) + if wantv { + bi.Dscal(p, -1, v[i:], ldv) + } + } + beta[k+i], alpha[k+i], _ = impl.Dlartg(math.Abs(gamma), 1) + + if alpha[k+i] >= beta[k+i] { + bi.Dscal(l-i, 1/alpha[k+i], a[(k+i)*lda+n-l+i:], 1) + } else { + bi.Dscal(l-i, 1/beta[k+i], b[i*ldb+n-l+i:], 1) + bi.Dcopy(l-i, b[i*ldb+n-l+i:], 1, a[(k+i)*lda+n-l+i:], 1) + } + } else { + alpha[k+i] = 0 + beta[k+i] = 1 + bi.Dcopy(l-i, b[i*ldb+n-l+i:], 1, a[(k+i)*lda+n-l+i:], 1) + } + } + + for i := m; i < k+l; i++ { + alpha[i] = 0 + beta[i] = 1 + } + if k+l < n { + for i := k + l; i < n; i++ { + alpha[i] = 0 + beta[i] = 0 + } + } + + return cycles, true + } + } + } + + // The algorithm has not converged after maxit cycles. + return cycles, false +} diff --git a/vendor/gonum.org/v1/gonum/lapack/gonum/dtrcon.go b/vendor/gonum.org/v1/gonum/lapack/gonum/dtrcon.go new file mode 100644 index 0000000000..899c95dd58 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dtrcon.go @@ -0,0 +1,90 @@ +// Copyright ©2015 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gonum + +import ( + "math" + + "gonum.org/v1/gonum/blas" + "gonum.org/v1/gonum/blas/blas64" + "gonum.org/v1/gonum/lapack" +) + +// Dtrcon estimates the reciprocal of the condition number of a triangular matrix A. +// The condition number computed may be based on the 1-norm or the ∞-norm. +// +// work is a temporary data slice of length at least 3*n and Dtrcon will panic otherwise. +// +// iwork is a temporary data slice of length at least n and Dtrcon will panic otherwise. +func (impl Implementation) Dtrcon(norm lapack.MatrixNorm, uplo blas.Uplo, diag blas.Diag, n int, a []float64, lda int, work []float64, iwork []int) float64 { + switch { + case norm != lapack.MaxColumnSum && norm != lapack.MaxRowSum: + panic(badNorm) + case uplo != blas.Upper && uplo != blas.Lower: + panic(badUplo) + case diag != blas.NonUnit && diag != blas.Unit: + panic(badDiag) + case n < 0: + panic(nLT0) + case lda < max(1, n): + panic(badLdA) + } + + if n == 0 { + return 1 + } + + switch { + case len(a) < (n-1)*lda+n: + panic(shortA) + case len(work) < 3*n: + panic(shortWork) + case len(iwork) < n: + panic(shortIWork) + } + + bi := blas64.Implementation() + + var rcond float64 + smlnum := dlamchS * float64(n) + + anorm := impl.Dlantr(norm, uplo, diag, n, n, a, lda, work) + + if anorm <= 0 { + return rcond + } + var ainvnm float64 + var normin bool + kase1 := 2 + if norm == lapack.MaxColumnSum { + kase1 = 1 + } + var kase int + isave := new([3]int) + var scale float64 + for { + ainvnm, kase = impl.Dlacn2(n, work[n:], work, iwork, ainvnm, kase, isave) + if kase == 0 { + if ainvnm != 0 { + rcond = (1 / anorm) / ainvnm + } + return rcond + } + if kase == kase1 { + scale = impl.Dlatrs(uplo, blas.NoTrans, diag, normin, n, a, lda, work, work[2*n:]) + } else { + scale = impl.Dlatrs(uplo, blas.Trans, diag, normin, n, a, lda, work, work[2*n:]) + } + normin = true + if scale != 1 { + ix := bi.Idamax(n, work, 1) + xnorm := math.Abs(work[ix]) + if scale == 0 || scale < xnorm*smlnum { + return rcond + } + impl.Drscl(n, scale, work, 1) + } + } +} diff --git a/vendor/gonum.org/v1/gonum/lapack/gonum/dtrevc3.go b/vendor/gonum.org/v1/gonum/lapack/gonum/dtrevc3.go new file mode 100644 index 0000000000..86197d3af5 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dtrevc3.go @@ -0,0 +1,894 @@ +// Copyright ©2016 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gonum + +import ( + "math" + + "gonum.org/v1/gonum/blas" + "gonum.org/v1/gonum/blas/blas64" + "gonum.org/v1/gonum/lapack" +) + +// Dtrevc3 computes some or all of the right and/or left eigenvectors of an n×n +// upper quasi-triangular matrix T in Schur canonical form. Matrices of this +// type are produced by the Schur factorization of a real general matrix A +// +// A = Q T Qᵀ, +// +// as computed by Dhseqr. +// +// The right eigenvector x of T corresponding to an +// eigenvalue λ is defined by +// +// T x = λ x, +// +// and the left eigenvector y is defined by +// +// yᵀ T = λ yᵀ. +// +// The eigenvalues are read directly from the diagonal blocks of T. +// +// This routine returns the matrices X and/or Y of right and left eigenvectors +// of T, or the products Q*X and/or Q*Y, where Q is an input matrix. If Q is the +// orthogonal factor that reduces a matrix A to Schur form T, then Q*X and Q*Y +// are the matrices of right and left eigenvectors of A. +// +// If side == lapack.EVRight, only right eigenvectors will be computed. +// If side == lapack.EVLeft, only left eigenvectors will be computed. +// If side == lapack.EVBoth, both right and left eigenvectors will be computed. +// For other values of side, Dtrevc3 will panic. +// +// If howmny == lapack.EVAll, all right and/or left eigenvectors will be +// computed. +// If howmny == lapack.EVAllMulQ, all right and/or left eigenvectors will be +// computed and multiplied from left by the matrices in VR and/or VL. +// If howmny == lapack.EVSelected, right and/or left eigenvectors will be +// computed as indicated by selected. +// For other values of howmny, Dtrevc3 will panic. +// +// selected specifies which eigenvectors will be computed. It must have length n +// if howmny == lapack.EVSelected, and it is not referenced otherwise. +// If w_j is a real eigenvalue, the corresponding real eigenvector will be +// computed if selected[j] is true. +// If w_j and w_{j+1} are the real and imaginary parts of a complex eigenvalue, +// the corresponding complex eigenvector is computed if either selected[j] or +// selected[j+1] is true, and on return selected[j] will be set to true and +// selected[j+1] will be set to false. +// +// VL and VR are n×mm matrices. If howmny is lapack.EVAll or +// lapack.AllEVMulQ, mm must be at least n. If howmny is +// lapack.EVSelected, mm must be large enough to store the selected +// eigenvectors. Each selected real eigenvector occupies one column and each +// selected complex eigenvector occupies two columns. If mm is not sufficiently +// large, Dtrevc3 will panic. +// +// On entry, if howmny is lapack.EVAllMulQ, it is assumed that VL (if side +// is lapack.EVLeft or lapack.EVBoth) contains an n×n matrix QL, +// and that VR (if side is lapack.EVRight or lapack.EVBoth) contains +// an n×n matrix QR. QL and QR are typically the orthogonal matrix Q of Schur +// vectors returned by Dhseqr. +// +// On return, if side is lapack.EVLeft or lapack.EVBoth, +// VL will contain: +// +// if howmny == lapack.EVAll, the matrix Y of left eigenvectors of T, +// if howmny == lapack.EVAllMulQ, the matrix Q*Y, +// if howmny == lapack.EVSelected, the left eigenvectors of T specified by +// selected, stored consecutively in the +// columns of VL, in the same order as their +// eigenvalues. +// +// VL is not referenced if side == lapack.EVRight. +// +// On return, if side is lapack.EVRight or lapack.EVBoth, +// VR will contain: +// +// if howmny == lapack.EVAll, the matrix X of right eigenvectors of T, +// if howmny == lapack.EVAllMulQ, the matrix Q*X, +// if howmny == lapack.EVSelected, the left eigenvectors of T specified by +// selected, stored consecutively in the +// columns of VR, in the same order as their +// eigenvalues. +// +// VR is not referenced if side == lapack.EVLeft. +// +// Complex eigenvectors corresponding to a complex eigenvalue are stored in VL +// and VR in two consecutive columns, the first holding the real part, and the +// second the imaginary part. +// +// Each eigenvector will be normalized so that the element of largest magnitude +// has magnitude 1. Here the magnitude of a complex number (x,y) is taken to be +// |x| + |y|. +// +// work must have length at least lwork and lwork must be at least max(1,3*n), +// otherwise Dtrevc3 will panic. For optimum performance, lwork should be at +// least n+2*n*nb, where nb is the optimal blocksize. +// +// If lwork == -1, instead of performing Dtrevc3, the function only estimates +// the optimal workspace size based on n and stores it into work[0]. +// +// Dtrevc3 returns the number of columns in VL and/or VR actually used to store +// the eigenvectors. +// +// Dtrevc3 is an internal routine. It is exported for testing purposes. +func (impl Implementation) Dtrevc3(side lapack.EVSide, howmny lapack.EVHowMany, selected []bool, n int, t []float64, ldt int, vl []float64, ldvl int, vr []float64, ldvr int, mm int, work []float64, lwork int) (m int) { + bothv := side == lapack.EVBoth + rightv := side == lapack.EVRight || bothv + leftv := side == lapack.EVLeft || bothv + switch { + case !rightv && !leftv: + panic(badEVSide) + case howmny != lapack.EVAll && howmny != lapack.EVAllMulQ && howmny != lapack.EVSelected: + panic(badEVHowMany) + case n < 0: + panic(nLT0) + case ldt < max(1, n): + panic(badLdT) + case mm < 0: + panic(mmLT0) + case ldvl < 1: + // ldvl and ldvr are also checked below after the computation of + // m (number of columns of VL and VR) in case of howmny == EVSelected. + panic(badLdVL) + case ldvr < 1: + panic(badLdVR) + case lwork < max(1, 3*n) && lwork != -1: + panic(badLWork) + case len(work) < max(1, lwork): + panic(shortWork) + } + + // Quick return if possible. + if n == 0 { + work[0] = 1 + return 0 + } + + // Normally we don't check slice lengths until after the workspace + // query. However, even in case of the workspace query we need to + // compute and return the value of m, and since the computation accesses t, + // we put the length check of t here. + if len(t) < (n-1)*ldt+n { + panic(shortT) + } + + if howmny == lapack.EVSelected { + if len(selected) != n { + panic(badLenSelected) + } + // Set m to the number of columns required to store the selected + // eigenvectors, and standardize the slice selected. + // Each selected real eigenvector occupies one column and each + // selected complex eigenvector occupies two columns. + for j := 0; j < n; { + if j == n-1 || t[(j+1)*ldt+j] == 0 { + // Diagonal 1×1 block corresponding to a + // real eigenvalue. + if selected[j] { + m++ + } + j++ + } else { + // Diagonal 2×2 block corresponding to a + // complex eigenvalue. + if selected[j] || selected[j+1] { + selected[j] = true + selected[j+1] = false + m += 2 + } + j += 2 + } + } + } else { + m = n + } + if mm < m { + panic(badMm) + } + + // Quick return in case of a workspace query. + nb := impl.Ilaenv(1, "DTREVC", string(side)+string(howmny), n, -1, -1, -1) + if lwork == -1 { + work[0] = float64(n + 2*n*nb) + return m + } + + // Quick return if no eigenvectors were selected. + if m == 0 { + return 0 + } + + switch { + case leftv && ldvl < mm: + panic(badLdVL) + case leftv && len(vl) < (n-1)*ldvl+mm: + panic(shortVL) + + case rightv && ldvr < mm: + panic(badLdVR) + case rightv && len(vr) < (n-1)*ldvr+mm: + panic(shortVR) + } + + // Use blocked version of back-transformation if sufficient workspace. + // Zero-out the workspace to avoid potential NaN propagation. + const ( + nbmin = 8 + nbmax = 128 + ) + if howmny == lapack.EVAllMulQ && lwork >= n+2*n*nbmin { + nb = min((lwork-n)/(2*n), nbmax) + impl.Dlaset(blas.All, n, 1+2*nb, 0, 0, work[:n+2*nb*n], 1+2*nb) + } else { + nb = 1 + } + + // Set the constants to control overflow. + ulp := dlamchP + smlnum := float64(n) / ulp * dlamchS + bignum := (1 - ulp) / smlnum + + // Split work into a vector of column norms and an n×2*nb matrix b. + norms := work[:n] + ldb := 2 * nb + b := work[n : n+n*ldb] + + // Compute 1-norm of each column of strictly upper triangular part of T + // to control overflow in triangular solver. + norms[0] = 0 + for j := 1; j < n; j++ { + var cn float64 + for i := 0; i < j; i++ { + cn += math.Abs(t[i*ldt+j]) + } + norms[j] = cn + } + + bi := blas64.Implementation() + + var ( + x [4]float64 + + iv int // Index of column in current block. + is int + + // ip is used below to specify the real or complex eigenvalue: + // ip == 0, real eigenvalue, + // 1, first of conjugate complex pair (wr,wi), + // -1, second of conjugate complex pair (wr,wi). + ip int + iscomplex [nbmax]int // Stores ip for each column in current block. + ) + + if side == lapack.EVLeft { + goto leftev + } + + // Compute right eigenvectors. + + // For complex right vector, iv-1 is for real part and iv for complex + // part. Non-blocked version always uses iv=1, blocked version starts + // with iv=nb-1 and goes down to 0 or 1. + iv = max(2, nb) - 1 + ip = 0 + is = m - 1 + for ki := n - 1; ki >= 0; ki-- { + if ip == -1 { + // Previous iteration (ki+1) was second of + // conjugate pair, so this ki is first of + // conjugate pair. + ip = 1 + continue + } + + if ki == 0 || t[ki*ldt+ki-1] == 0 { + // Last column or zero on sub-diagonal, so this + // ki must be real eigenvalue. + ip = 0 + } else { + // Non-zero on sub-diagonal, so this ki is + // second of conjugate pair. + ip = -1 + } + + if howmny == lapack.EVSelected { + if ip == 0 { + if !selected[ki] { + continue + } + } else if !selected[ki-1] { + continue + } + } + + // Compute the ki-th eigenvalue (wr,wi). + wr := t[ki*ldt+ki] + var wi float64 + if ip != 0 { + wi = math.Sqrt(math.Abs(t[ki*ldt+ki-1])) * math.Sqrt(math.Abs(t[(ki-1)*ldt+ki])) + } + smin := math.Max(ulp*(math.Abs(wr)+math.Abs(wi)), smlnum) + + if ip == 0 { + // Real right eigenvector. + + b[ki*ldb+iv] = 1 + // Form right-hand side. + for k := 0; k < ki; k++ { + b[k*ldb+iv] = -t[k*ldt+ki] + } + // Solve upper quasi-triangular system: + // [ T[0:ki,0:ki] - wr ]*X = scale*b. + for j := ki - 1; j >= 0; { + if j == 0 || t[j*ldt+j-1] == 0 { + // 1×1 diagonal block. + scale, xnorm, _ := impl.Dlaln2(false, 1, 1, smin, 1, t[j*ldt+j:], ldt, + 1, 1, b[j*ldb+iv:], ldb, wr, 0, x[:1], 2) + // Scale X[0,0] to avoid overflow when updating the + // right-hand side. + if xnorm > 1 && norms[j] > bignum/xnorm { + x[0] /= xnorm + scale /= xnorm + } + // Scale if necessary. + if scale != 1 { + bi.Dscal(ki+1, scale, b[iv:], ldb) + } + b[j*ldb+iv] = x[0] + // Update right-hand side. + bi.Daxpy(j, -x[0], t[j:], ldt, b[iv:], ldb) + j-- + } else { + // 2×2 diagonal block. + scale, xnorm, _ := impl.Dlaln2(false, 2, 1, smin, 1, t[(j-1)*ldt+j-1:], ldt, + 1, 1, b[(j-1)*ldb+iv:], ldb, wr, 0, x[:3], 2) + // Scale X[0,0] and X[1,0] to avoid overflow + // when updating the right-hand side. + if xnorm > 1 { + beta := math.Max(norms[j-1], norms[j]) + if beta > bignum/xnorm { + x[0] /= xnorm + x[2] /= xnorm + scale /= xnorm + } + } + // Scale if necessary. + if scale != 1 { + bi.Dscal(ki+1, scale, b[iv:], ldb) + } + b[(j-1)*ldb+iv] = x[0] + b[j*ldb+iv] = x[2] + // Update right-hand side. + bi.Daxpy(j-1, -x[0], t[j-1:], ldt, b[iv:], ldb) + bi.Daxpy(j-1, -x[2], t[j:], ldt, b[iv:], ldb) + j -= 2 + } + } + // Copy the vector x or Q*x to VR and normalize. + switch { + case howmny != lapack.EVAllMulQ: + // No back-transform: copy x to VR and normalize. + bi.Dcopy(ki+1, b[iv:], ldb, vr[is:], ldvr) + ii := bi.Idamax(ki+1, vr[is:], ldvr) + remax := 1 / math.Abs(vr[ii*ldvr+is]) + bi.Dscal(ki+1, remax, vr[is:], ldvr) + for k := ki + 1; k < n; k++ { + vr[k*ldvr+is] = 0 + } + case nb == 1: + // Version 1: back-transform each vector with GEMV, Q*x. + if ki > 0 { + bi.Dgemv(blas.NoTrans, n, ki, 1, vr, ldvr, b[iv:], ldb, + b[ki*ldb+iv], vr[ki:], ldvr) + } + ii := bi.Idamax(n, vr[ki:], ldvr) + remax := 1 / math.Abs(vr[ii*ldvr+ki]) + bi.Dscal(n, remax, vr[ki:], ldvr) + default: + // Version 2: back-transform block of vectors with GEMM. + // Zero out below vector. + for k := ki + 1; k < n; k++ { + b[k*ldb+iv] = 0 + } + iscomplex[iv] = ip + // Back-transform and normalization is done below. + } + } else { + // Complex right eigenvector. + + // Initial solve + // [ ( T[ki-1,ki-1] T[ki-1,ki] ) - (wr + i*wi) ]*X = 0. + // [ ( T[ki, ki-1] T[ki, ki] ) ] + if math.Abs(t[(ki-1)*ldt+ki]) >= math.Abs(t[ki*ldt+ki-1]) { + b[(ki-1)*ldb+iv-1] = 1 + b[ki*ldb+iv] = wi / t[(ki-1)*ldt+ki] + } else { + b[(ki-1)*ldb+iv-1] = -wi / t[ki*ldt+ki-1] + b[ki*ldb+iv] = 1 + } + b[ki*ldb+iv-1] = 0 + b[(ki-1)*ldb+iv] = 0 + // Form right-hand side. + for k := 0; k < ki-1; k++ { + b[k*ldb+iv-1] = -b[(ki-1)*ldb+iv-1] * t[k*ldt+ki-1] + b[k*ldb+iv] = -b[ki*ldb+iv] * t[k*ldt+ki] + } + // Solve upper quasi-triangular system: + // [ T[0:ki-1,0:ki-1] - (wr+i*wi) ]*X = scale*(b1+i*b2) + for j := ki - 2; j >= 0; { + if j == 0 || t[j*ldt+j-1] == 0 { + // 1×1 diagonal block. + + scale, xnorm, _ := impl.Dlaln2(false, 1, 2, smin, 1, t[j*ldt+j:], ldt, + 1, 1, b[j*ldb+iv-1:], ldb, wr, wi, x[:2], 2) + // Scale X[0,0] and X[0,1] to avoid + // overflow when updating the right-hand side. + if xnorm > 1 && norms[j] > bignum/xnorm { + x[0] /= xnorm + x[1] /= xnorm + scale /= xnorm + } + // Scale if necessary. + if scale != 1 { + bi.Dscal(ki+1, scale, b[iv-1:], ldb) + bi.Dscal(ki+1, scale, b[iv:], ldb) + } + b[j*ldb+iv-1] = x[0] + b[j*ldb+iv] = x[1] + // Update the right-hand side. + bi.Daxpy(j, -x[0], t[j:], ldt, b[iv-1:], ldb) + bi.Daxpy(j, -x[1], t[j:], ldt, b[iv:], ldb) + j-- + } else { + // 2×2 diagonal block. + + scale, xnorm, _ := impl.Dlaln2(false, 2, 2, smin, 1, t[(j-1)*ldt+j-1:], ldt, + 1, 1, b[(j-1)*ldb+iv-1:], ldb, wr, wi, x[:], 2) + // Scale X to avoid overflow when updating + // the right-hand side. + if xnorm > 1 { + beta := math.Max(norms[j-1], norms[j]) + if beta > bignum/xnorm { + rec := 1 / xnorm + x[0] *= rec + x[1] *= rec + x[2] *= rec + x[3] *= rec + scale *= rec + } + } + // Scale if necessary. + if scale != 1 { + bi.Dscal(ki+1, scale, b[iv-1:], ldb) + bi.Dscal(ki+1, scale, b[iv:], ldb) + } + b[(j-1)*ldb+iv-1] = x[0] + b[(j-1)*ldb+iv] = x[1] + b[j*ldb+iv-1] = x[2] + b[j*ldb+iv] = x[3] + // Update the right-hand side. + bi.Daxpy(j-1, -x[0], t[j-1:], ldt, b[iv-1:], ldb) + bi.Daxpy(j-1, -x[1], t[j-1:], ldt, b[iv:], ldb) + bi.Daxpy(j-1, -x[2], t[j:], ldt, b[iv-1:], ldb) + bi.Daxpy(j-1, -x[3], t[j:], ldt, b[iv:], ldb) + j -= 2 + } + } + + // Copy the vector x or Q*x to VR and normalize. + switch { + case howmny != lapack.EVAllMulQ: + // No back-transform: copy x to VR and normalize. + bi.Dcopy(ki+1, b[iv-1:], ldb, vr[is-1:], ldvr) + bi.Dcopy(ki+1, b[iv:], ldb, vr[is:], ldvr) + emax := 0.0 + for k := 0; k <= ki; k++ { + emax = math.Max(emax, math.Abs(vr[k*ldvr+is-1])+math.Abs(vr[k*ldvr+is])) + } + remax := 1 / emax + bi.Dscal(ki+1, remax, vr[is-1:], ldvr) + bi.Dscal(ki+1, remax, vr[is:], ldvr) + for k := ki + 1; k < n; k++ { + vr[k*ldvr+is-1] = 0 + vr[k*ldvr+is] = 0 + } + case nb == 1: + // Version 1: back-transform each vector with GEMV, Q*x. + if ki-1 > 0 { + bi.Dgemv(blas.NoTrans, n, ki-1, 1, vr, ldvr, b[iv-1:], ldb, + b[(ki-1)*ldb+iv-1], vr[ki-1:], ldvr) + bi.Dgemv(blas.NoTrans, n, ki-1, 1, vr, ldvr, b[iv:], ldb, + b[ki*ldb+iv], vr[ki:], ldvr) + } else { + bi.Dscal(n, b[(ki-1)*ldb+iv-1], vr[ki-1:], ldvr) + bi.Dscal(n, b[ki*ldb+iv], vr[ki:], ldvr) + } + emax := 0.0 + for k := 0; k < n; k++ { + emax = math.Max(emax, math.Abs(vr[k*ldvr+ki-1])+math.Abs(vr[k*ldvr+ki])) + } + remax := 1 / emax + bi.Dscal(n, remax, vr[ki-1:], ldvr) + bi.Dscal(n, remax, vr[ki:], ldvr) + default: + // Version 2: back-transform block of vectors with GEMM. + // Zero out below vector. + for k := ki + 1; k < n; k++ { + b[k*ldb+iv-1] = 0 + b[k*ldb+iv] = 0 + } + iscomplex[iv-1] = -ip + iscomplex[iv] = ip + iv-- + // Back-transform and normalization is done below. + } + } + if nb > 1 { + // Blocked version of back-transform. + + // For complex case, ki2 includes both vectors (ki-1 and ki). + ki2 := ki + if ip != 0 { + ki2-- + } + // Columns iv:nb of b are valid vectors. + // When the number of vectors stored reaches nb-1 or nb, + // or if this was last vector, do the Gemm. + if iv < 2 || ki2 == 0 { + bi.Dgemm(blas.NoTrans, blas.NoTrans, n, nb-iv, ki2+nb-iv, + 1, vr, ldvr, b[iv:], ldb, + 0, b[nb+iv:], ldb) + // Normalize vectors. + var remax float64 + for k := iv; k < nb; k++ { + if iscomplex[k] == 0 { + // Real eigenvector. + ii := bi.Idamax(n, b[nb+k:], ldb) + remax = 1 / math.Abs(b[ii*ldb+nb+k]) + } else if iscomplex[k] == 1 { + // First eigenvector of conjugate pair. + emax := 0.0 + for ii := 0; ii < n; ii++ { + emax = math.Max(emax, math.Abs(b[ii*ldb+nb+k])+math.Abs(b[ii*ldb+nb+k+1])) + } + remax = 1 / emax + // Second eigenvector of conjugate pair + // will reuse this value of remax. + } + bi.Dscal(n, remax, b[nb+k:], ldb) + } + impl.Dlacpy(blas.All, n, nb-iv, b[nb+iv:], ldb, vr[ki2:], ldvr) + iv = nb - 1 + } else { + iv-- + } + } + is-- + if ip != 0 { + is-- + } + } + + if side == lapack.EVRight { + return m + } + +leftev: + // Compute left eigenvectors. + + // For complex left vector, iv is for real part and iv+1 for complex + // part. Non-blocked version always uses iv=0. Blocked version starts + // with iv=0, goes up to nb-2 or nb-1. + iv = 0 + ip = 0 + is = 0 + for ki := 0; ki < n; ki++ { + if ip == 1 { + // Previous iteration ki-1 was first of conjugate pair, + // so this ki is second of conjugate pair. + ip = -1 + continue + } + + if ki == n-1 || t[(ki+1)*ldt+ki] == 0 { + // Last column or zero on sub-diagonal, so this ki must + // be real eigenvalue. + ip = 0 + } else { + // Non-zero on sub-diagonal, so this ki is first of + // conjugate pair. + ip = 1 + } + if howmny == lapack.EVSelected && !selected[ki] { + continue + } + + // Compute the ki-th eigenvalue (wr,wi). + wr := t[ki*ldt+ki] + var wi float64 + if ip != 0 { + wi = math.Sqrt(math.Abs(t[ki*ldt+ki+1])) * math.Sqrt(math.Abs(t[(ki+1)*ldt+ki])) + } + smin := math.Max(ulp*(math.Abs(wr)+math.Abs(wi)), smlnum) + + if ip == 0 { + // Real left eigenvector. + + b[ki*ldb+iv] = 1 + // Form right-hand side. + for k := ki + 1; k < n; k++ { + b[k*ldb+iv] = -t[ki*ldt+k] + } + // Solve transposed quasi-triangular system: + // [ T[ki+1:n,ki+1:n] - wr ]ᵀ * X = scale*b + vmax := 1.0 + vcrit := bignum + for j := ki + 1; j < n; { + if j == n-1 || t[(j+1)*ldt+j] == 0 { + // 1×1 diagonal block. + + // Scale if necessary to avoid overflow + // when forming the right-hand side. + if norms[j] > vcrit { + rec := 1 / vmax + bi.Dscal(n-ki, rec, b[ki*ldb+iv:], ldb) + vmax = 1 + } + b[j*ldb+iv] -= bi.Ddot(j-ki-1, t[(ki+1)*ldt+j:], ldt, b[(ki+1)*ldb+iv:], ldb) + // Solve [ T[j,j] - wr ]ᵀ * X = b. + scale, _, _ := impl.Dlaln2(false, 1, 1, smin, 1, t[j*ldt+j:], ldt, + 1, 1, b[j*ldb+iv:], ldb, wr, 0, x[:1], 2) + // Scale if necessary. + if scale != 1 { + bi.Dscal(n-ki, scale, b[ki*ldb+iv:], ldb) + } + b[j*ldb+iv] = x[0] + vmax = math.Max(math.Abs(b[j*ldb+iv]), vmax) + vcrit = bignum / vmax + j++ + } else { + // 2×2 diagonal block. + + // Scale if necessary to avoid overflow + // when forming the right-hand side. + beta := math.Max(norms[j], norms[j+1]) + if beta > vcrit { + bi.Dscal(n-ki, 1/vmax, b[ki*ldb+iv:], ldb) + vmax = 1 + } + b[j*ldb+iv] -= bi.Ddot(j-ki-1, t[(ki+1)*ldt+j:], ldt, b[(ki+1)*ldb+iv:], ldb) + b[(j+1)*ldb+iv] -= bi.Ddot(j-ki-1, t[(ki+1)*ldt+j+1:], ldt, b[(ki+1)*ldb+iv:], ldb) + // Solve + // [ T[j,j]-wr T[j,j+1] ]ᵀ * X = scale*[ b1 ] + // [ T[j+1,j] T[j+1,j+1]-wr ] [ b2 ] + scale, _, _ := impl.Dlaln2(true, 2, 1, smin, 1, t[j*ldt+j:], ldt, + 1, 1, b[j*ldb+iv:], ldb, wr, 0, x[:3], 2) + // Scale if necessary. + if scale != 1 { + bi.Dscal(n-ki, scale, b[ki*ldb+iv:], ldb) + } + b[j*ldb+iv] = x[0] + b[(j+1)*ldb+iv] = x[2] + vmax = math.Max(vmax, math.Max(math.Abs(b[j*ldb+iv]), math.Abs(b[(j+1)*ldb+iv]))) + vcrit = bignum / vmax + j += 2 + } + } + // Copy the vector x or Q*x to VL and normalize. + switch { + case howmny != lapack.EVAllMulQ: + // No back-transform: copy x to VL and normalize. + bi.Dcopy(n-ki, b[ki*ldb+iv:], ldb, vl[ki*ldvl+is:], ldvl) + ii := bi.Idamax(n-ki, vl[ki*ldvl+is:], ldvl) + ki + remax := 1 / math.Abs(vl[ii*ldvl+is]) + bi.Dscal(n-ki, remax, vl[ki*ldvl+is:], ldvl) + for k := 0; k < ki; k++ { + vl[k*ldvl+is] = 0 + } + case nb == 1: + // Version 1: back-transform each vector with Gemv, Q*x. + if n-ki-1 > 0 { + bi.Dgemv(blas.NoTrans, n, n-ki-1, + 1, vl[ki+1:], ldvl, b[(ki+1)*ldb+iv:], ldb, + b[ki*ldb+iv], vl[ki:], ldvl) + } + ii := bi.Idamax(n, vl[ki:], ldvl) + remax := 1 / math.Abs(vl[ii*ldvl+ki]) + bi.Dscal(n, remax, vl[ki:], ldvl) + default: + // Version 2: back-transform block of vectors with Gemm + // zero out above vector. + for k := 0; k < ki; k++ { + b[k*ldb+iv] = 0 + } + iscomplex[iv] = ip + // Back-transform and normalization is done below. + } + } else { + // Complex left eigenvector. + + // Initial solve: + // [ [ T[ki,ki] T[ki,ki+1] ]ᵀ - (wr - i* wi) ]*X = 0. + // [ [ T[ki+1,ki] T[ki+1,ki+1] ] ] + if math.Abs(t[ki*ldt+ki+1]) >= math.Abs(t[(ki+1)*ldt+ki]) { + b[ki*ldb+iv] = wi / t[ki*ldt+ki+1] + b[(ki+1)*ldb+iv+1] = 1 + } else { + b[ki*ldb+iv] = 1 + b[(ki+1)*ldb+iv+1] = -wi / t[(ki+1)*ldt+ki] + } + b[(ki+1)*ldb+iv] = 0 + b[ki*ldb+iv+1] = 0 + // Form right-hand side. + for k := ki + 2; k < n; k++ { + b[k*ldb+iv] = -b[ki*ldb+iv] * t[ki*ldt+k] + b[k*ldb+iv+1] = -b[(ki+1)*ldb+iv+1] * t[(ki+1)*ldt+k] + } + // Solve transposed quasi-triangular system: + // [ T[ki+2:n,ki+2:n]ᵀ - (wr-i*wi) ]*X = b1+i*b2 + vmax := 1.0 + vcrit := bignum + for j := ki + 2; j < n; { + if j == n-1 || t[(j+1)*ldt+j] == 0 { + // 1×1 diagonal block. + + // Scale if necessary to avoid overflow + // when forming the right-hand side elements. + if norms[j] > vcrit { + rec := 1 / vmax + bi.Dscal(n-ki, rec, b[ki*ldb+iv:], ldb) + bi.Dscal(n-ki, rec, b[ki*ldb+iv+1:], ldb) + vmax = 1 + } + b[j*ldb+iv] -= bi.Ddot(j-ki-2, t[(ki+2)*ldt+j:], ldt, b[(ki+2)*ldb+iv:], ldb) + b[j*ldb+iv+1] -= bi.Ddot(j-ki-2, t[(ki+2)*ldt+j:], ldt, b[(ki+2)*ldb+iv+1:], ldb) + // Solve [ T[j,j]-(wr-i*wi) ]*(X11+i*X12) = b1+i*b2. + scale, _, _ := impl.Dlaln2(false, 1, 2, smin, 1, t[j*ldt+j:], ldt, + 1, 1, b[j*ldb+iv:], ldb, wr, -wi, x[:2], 2) + // Scale if necessary. + if scale != 1 { + bi.Dscal(n-ki, scale, b[ki*ldb+iv:], ldb) + bi.Dscal(n-ki, scale, b[ki*ldb+iv+1:], ldb) + } + b[j*ldb+iv] = x[0] + b[j*ldb+iv+1] = x[1] + vmax = math.Max(vmax, math.Max(math.Abs(b[j*ldb+iv]), math.Abs(b[j*ldb+iv+1]))) + vcrit = bignum / vmax + j++ + } else { + // 2×2 diagonal block. + + // Scale if necessary to avoid overflow + // when forming the right-hand side elements. + if math.Max(norms[j], norms[j+1]) > vcrit { + rec := 1 / vmax + bi.Dscal(n-ki, rec, b[ki*ldb+iv:], ldb) + bi.Dscal(n-ki, rec, b[ki*ldb+iv+1:], ldb) + vmax = 1 + } + b[j*ldb+iv] -= bi.Ddot(j-ki-2, t[(ki+2)*ldt+j:], ldt, b[(ki+2)*ldb+iv:], ldb) + b[j*ldb+iv+1] -= bi.Ddot(j-ki-2, t[(ki+2)*ldt+j:], ldt, b[(ki+2)*ldb+iv+1:], ldb) + b[(j+1)*ldb+iv] -= bi.Ddot(j-ki-2, t[(ki+2)*ldt+j+1:], ldt, b[(ki+2)*ldb+iv:], ldb) + b[(j+1)*ldb+iv+1] -= bi.Ddot(j-ki-2, t[(ki+2)*ldt+j+1:], ldt, b[(ki+2)*ldb+iv+1:], ldb) + // Solve 2×2 complex linear equation + // [ [T[j,j] T[j,j+1] ]ᵀ - (wr-i*wi)*I ]*X = scale*b + // [ [T[j+1,j] T[j+1,j+1]] ] + scale, _, _ := impl.Dlaln2(true, 2, 2, smin, 1, t[j*ldt+j:], ldt, + 1, 1, b[j*ldb+iv:], ldb, wr, -wi, x[:], 2) + // Scale if necessary. + if scale != 1 { + bi.Dscal(n-ki, scale, b[ki*ldb+iv:], ldb) + bi.Dscal(n-ki, scale, b[ki*ldb+iv+1:], ldb) + } + b[j*ldb+iv] = x[0] + b[j*ldb+iv+1] = x[1] + b[(j+1)*ldb+iv] = x[2] + b[(j+1)*ldb+iv+1] = x[3] + vmax01 := math.Max(math.Abs(x[0]), math.Abs(x[1])) + vmax23 := math.Max(math.Abs(x[2]), math.Abs(x[3])) + vmax = math.Max(vmax, math.Max(vmax01, vmax23)) + vcrit = bignum / vmax + j += 2 + } + } + // Copy the vector x or Q*x to VL and normalize. + switch { + case howmny != lapack.EVAllMulQ: + // No back-transform: copy x to VL and normalize. + bi.Dcopy(n-ki, b[ki*ldb+iv:], ldb, vl[ki*ldvl+is:], ldvl) + bi.Dcopy(n-ki, b[ki*ldb+iv+1:], ldb, vl[ki*ldvl+is+1:], ldvl) + emax := 0.0 + for k := ki; k < n; k++ { + emax = math.Max(emax, math.Abs(vl[k*ldvl+is])+math.Abs(vl[k*ldvl+is+1])) + } + remax := 1 / emax + bi.Dscal(n-ki, remax, vl[ki*ldvl+is:], ldvl) + bi.Dscal(n-ki, remax, vl[ki*ldvl+is+1:], ldvl) + for k := 0; k < ki; k++ { + vl[k*ldvl+is] = 0 + vl[k*ldvl+is+1] = 0 + } + case nb == 1: + // Version 1: back-transform each vector with GEMV, Q*x. + if n-ki-2 > 0 { + bi.Dgemv(blas.NoTrans, n, n-ki-2, + 1, vl[ki+2:], ldvl, b[(ki+2)*ldb+iv:], ldb, + b[ki*ldb+iv], vl[ki:], ldvl) + bi.Dgemv(blas.NoTrans, n, n-ki-2, + 1, vl[ki+2:], ldvl, b[(ki+2)*ldb+iv+1:], ldb, + b[(ki+1)*ldb+iv+1], vl[ki+1:], ldvl) + } else { + bi.Dscal(n, b[ki*ldb+iv], vl[ki:], ldvl) + bi.Dscal(n, b[(ki+1)*ldb+iv+1], vl[ki+1:], ldvl) + } + emax := 0.0 + for k := 0; k < n; k++ { + emax = math.Max(emax, math.Abs(vl[k*ldvl+ki])+math.Abs(vl[k*ldvl+ki+1])) + } + remax := 1 / emax + bi.Dscal(n, remax, vl[ki:], ldvl) + bi.Dscal(n, remax, vl[ki+1:], ldvl) + default: + // Version 2: back-transform block of vectors with GEMM. + // Zero out above vector. + // Could go from ki-nv+1 to ki-1. + for k := 0; k < ki; k++ { + b[k*ldb+iv] = 0 + b[k*ldb+iv+1] = 0 + } + iscomplex[iv] = ip + iscomplex[iv+1] = -ip + iv++ + // Back-transform and normalization is done below. + } + } + if nb > 1 { + // Blocked version of back-transform. + // For complex case, ki2 includes both vectors ki and ki+1. + ki2 := ki + if ip != 0 { + ki2++ + } + // Columns [0:iv] of work are valid vectors. When the + // number of vectors stored reaches nb-1 or nb, or if + // this was last vector, do the Gemm. + if iv >= nb-2 || ki2 == n-1 { + bi.Dgemm(blas.NoTrans, blas.NoTrans, n, iv+1, n-ki2+iv, + 1, vl[ki2-iv:], ldvl, b[(ki2-iv)*ldb:], ldb, + 0, b[nb:], ldb) + // Normalize vectors. + var remax float64 + for k := 0; k <= iv; k++ { + if iscomplex[k] == 0 { + // Real eigenvector. + ii := bi.Idamax(n, b[nb+k:], ldb) + remax = 1 / math.Abs(b[ii*ldb+nb+k]) + } else if iscomplex[k] == 1 { + // First eigenvector of conjugate pair. + emax := 0.0 + for ii := 0; ii < n; ii++ { + emax = math.Max(emax, math.Abs(b[ii*ldb+nb+k])+math.Abs(b[ii*ldb+nb+k+1])) + } + remax = 1 / emax + // Second eigenvector of conjugate pair + // will reuse this value of remax. + } + bi.Dscal(n, remax, b[nb+k:], ldb) + } + impl.Dlacpy(blas.All, n, iv+1, b[nb:], ldb, vl[ki2-iv:], ldvl) + iv = 0 + } else { + iv++ + } + } + is++ + if ip != 0 { + is++ + } + } + + return m +} diff --git a/vendor/gonum.org/v1/gonum/lapack/gonum/dtrexc.go b/vendor/gonum.org/v1/gonum/lapack/gonum/dtrexc.go new file mode 100644 index 0000000000..2a0a5e7c6d --- /dev/null +++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dtrexc.go @@ -0,0 +1,230 @@ +// Copyright ©2016 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gonum + +import "gonum.org/v1/gonum/lapack" + +// Dtrexc reorders the real Schur factorization of a n×n real matrix +// +// A = Q*T*Qᵀ +// +// so that the diagonal block of T with row index ifst is moved to row ilst. +// +// On entry, T must be in Schur canonical form, that is, block upper triangular +// with 1×1 and 2×2 diagonal blocks; each 2×2 diagonal block has its diagonal +// elements equal and its off-diagonal elements of opposite sign. +// +// On return, T will be reordered by an orthogonal similarity transformation Z +// as Zᵀ*T*Z, and will be again in Schur canonical form. +// +// If compq is lapack.UpdateSchur, on return the matrix Q of Schur vectors will be +// updated by post-multiplying it with Z. +// If compq is lapack.UpdateSchurNone, the matrix Q is not referenced and will not be +// updated. +// For other values of compq Dtrexc will panic. +// +// ifst and ilst specify the reordering of the diagonal blocks of T. The block +// with row index ifst is moved to row ilst, by a sequence of transpositions +// between adjacent blocks. +// +// If ifst points to the second row of a 2×2 block, ifstOut will point to the +// first row, otherwise it will be equal to ifst. +// +// ilstOut will point to the first row of the block in its final position. If ok +// is true, ilstOut may differ from ilst by +1 or -1. +// +// It must hold that +// +// 0 <= ifst < n, and 0 <= ilst < n, +// +// otherwise Dtrexc will panic. +// +// If ok is false, two adjacent blocks were too close to swap because the +// problem is very ill-conditioned. T may have been partially reordered, and +// ilstOut will point to the first row of the block at the position to which it +// has been moved. +// +// work must have length at least n, otherwise Dtrexc will panic. +// +// Dtrexc is an internal routine. It is exported for testing purposes. +func (impl Implementation) Dtrexc(compq lapack.UpdateSchurComp, n int, t []float64, ldt int, q []float64, ldq int, ifst, ilst int, work []float64) (ifstOut, ilstOut int, ok bool) { + switch { + case compq != lapack.UpdateSchur && compq != lapack.UpdateSchurNone: + panic(badUpdateSchurComp) + case n < 0: + panic(nLT0) + case ldt < max(1, n): + panic(badLdT) + case ldq < 1, compq == lapack.UpdateSchur && ldq < n: + panic(badLdQ) + case (ifst < 0 || n <= ifst) && n > 0: + panic(badIfst) + case (ilst < 0 || n <= ilst) && n > 0: + panic(badIlst) + } + + // Quick return if possible. + if n == 0 { + return ifst, ilst, true + } + + switch { + case len(t) < (n-1)*ldt+n: + panic(shortT) + case compq == lapack.UpdateSchur && len(q) < (n-1)*ldq+n: + panic(shortQ) + case len(work) < n: + panic(shortWork) + } + + // Quick return if possible. + if n == 1 { + return ifst, ilst, true + } + + // Determine the first row of specified block + // and find out it is 1×1 or 2×2. + if ifst > 0 && t[ifst*ldt+ifst-1] != 0 { + ifst-- + } + nbf := 1 // Size of the first block. + if ifst+1 < n && t[(ifst+1)*ldt+ifst] != 0 { + nbf = 2 + } + // Determine the first row of the final block + // and find out it is 1×1 or 2×2. + if ilst > 0 && t[ilst*ldt+ilst-1] != 0 { + ilst-- + } + nbl := 1 // Size of the last block. + if ilst+1 < n && t[(ilst+1)*ldt+ilst] != 0 { + nbl = 2 + } + + ok = true + wantq := compq == lapack.UpdateSchur + + switch { + case ifst == ilst: + return ifst, ilst, true + + case ifst < ilst: + // Update ilst. + switch { + case nbf == 2 && nbl == 1: + ilst-- + case nbf == 1 && nbl == 2: + ilst++ + } + here := ifst + for here < ilst { + // Swap block with next one below. + if nbf == 1 || nbf == 2 { + // Current block either 1×1 or 2×2. + nbnext := 1 // Size of the next block. + if here+nbf+1 < n && t[(here+nbf+1)*ldt+here+nbf] != 0 { + nbnext = 2 + } + ok = impl.Dlaexc(wantq, n, t, ldt, q, ldq, here, nbf, nbnext, work) + if !ok { + return ifst, here, false + } + here += nbnext + // Test if 2×2 block breaks into two 1×1 blocks. + if nbf == 2 && t[(here+1)*ldt+here] == 0 { + nbf = 3 + } + continue + } + + // Current block consists of two 1×1 blocks each of + // which must be swapped individually. + nbnext := 1 // Size of the next block. + if here+3 < n && t[(here+3)*ldt+here+2] != 0 { + nbnext = 2 + } + ok = impl.Dlaexc(wantq, n, t, ldt, q, ldq, here+1, 1, nbnext, work) + if !ok { + return ifst, here, false + } + if nbnext == 1 { + // Swap two 1×1 blocks, no problems possible. + impl.Dlaexc(wantq, n, t, ldt, q, ldq, here, 1, nbnext, work) + here++ + continue + } + // Recompute nbnext in case 2×2 split. + if t[(here+2)*ldt+here+1] == 0 { + nbnext = 1 + } + if nbnext == 2 { + // 2×2 block did not split. + ok = impl.Dlaexc(wantq, n, t, ldt, q, ldq, here, 1, nbnext, work) + if !ok { + return ifst, here, false + } + } else { + // 2×2 block did split. + impl.Dlaexc(wantq, n, t, ldt, q, ldq, here, 1, 1, work) + impl.Dlaexc(wantq, n, t, ldt, q, ldq, here+1, 1, 1, work) + } + here += 2 + } + return ifst, here, true + + default: // ifst > ilst + here := ifst + for here > ilst { + // Swap block with next one above. + nbnext := 1 + if here >= 2 && t[(here-1)*ldt+here-2] != 0 { + nbnext = 2 + } + if nbf == 1 || nbf == 2 { + // Current block either 1×1 or 2×2. + ok = impl.Dlaexc(wantq, n, t, ldt, q, ldq, here-nbnext, nbnext, nbf, work) + if !ok { + return ifst, here, false + } + here -= nbnext + // Test if 2×2 block breaks into two 1×1 blocks. + if nbf == 2 && t[(here+1)*ldt+here] == 0 { + nbf = 3 + } + continue + } + + // Current block consists of two 1×1 blocks each of + // which must be swapped individually. + ok = impl.Dlaexc(wantq, n, t, ldt, q, ldq, here-nbnext, nbnext, 1, work) + if !ok { + return ifst, here, false + } + if nbnext == 1 { + // Swap two 1×1 blocks, no problems possible. + impl.Dlaexc(wantq, n, t, ldt, q, ldq, here, nbnext, 1, work) + here-- + continue + } + // Recompute nbnext in case 2×2 split. + if t[here*ldt+here-1] == 0 { + nbnext = 1 + } + if nbnext == 2 { + // 2×2 block did not split. + ok = impl.Dlaexc(wantq, n, t, ldt, q, ldq, here-1, 2, 1, work) + if !ok { + return ifst, here, false + } + } else { + // 2×2 block did split. + impl.Dlaexc(wantq, n, t, ldt, q, ldq, here, 1, 1, work) + impl.Dlaexc(wantq, n, t, ldt, q, ldq, here-1, 1, 1, work) + } + here -= 2 + } + return ifst, here, true + } +} diff --git a/vendor/gonum.org/v1/gonum/lapack/gonum/dtrti2.go b/vendor/gonum.org/v1/gonum/lapack/gonum/dtrti2.go new file mode 100644 index 0000000000..efc24b65ea --- /dev/null +++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dtrti2.go @@ -0,0 +1,69 @@ +// Copyright ©2015 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gonum + +import ( + "gonum.org/v1/gonum/blas" + "gonum.org/v1/gonum/blas/blas64" +) + +// Dtrti2 computes the inverse of a triangular matrix, storing the result in place +// into a. This is the BLAS level 2 version of the algorithm. +// +// Dtrti2 is an internal routine. It is exported for testing purposes. +func (impl Implementation) Dtrti2(uplo blas.Uplo, diag blas.Diag, n int, a []float64, lda int) { + switch { + case uplo != blas.Upper && uplo != blas.Lower: + panic(badUplo) + case diag != blas.NonUnit && diag != blas.Unit: + panic(badDiag) + case n < 0: + panic(nLT0) + case lda < max(1, n): + panic(badLdA) + } + + if n == 0 { + return + } + + if len(a) < (n-1)*lda+n { + panic(shortA) + } + + bi := blas64.Implementation() + + nonUnit := diag == blas.NonUnit + // TODO(btracey): Replace this with a row-major ordering. + if uplo == blas.Upper { + for j := 0; j < n; j++ { + var ajj float64 + if nonUnit { + ajj = 1 / a[j*lda+j] + a[j*lda+j] = ajj + ajj *= -1 + } else { + ajj = -1 + } + bi.Dtrmv(blas.Upper, blas.NoTrans, diag, j, a, lda, a[j:], lda) + bi.Dscal(j, ajj, a[j:], lda) + } + return + } + for j := n - 1; j >= 0; j-- { + var ajj float64 + if nonUnit { + ajj = 1 / a[j*lda+j] + a[j*lda+j] = ajj + ajj *= -1 + } else { + ajj = -1 + } + if j < n-1 { + bi.Dtrmv(blas.Lower, blas.NoTrans, diag, n-j-1, a[(j+1)*lda+j+1:], lda, a[(j+1)*lda+j:], lda) + bi.Dscal(n-j-1, ajj, a[(j+1)*lda+j:], lda) + } + } +} diff --git a/vendor/gonum.org/v1/gonum/lapack/gonum/dtrtri.go b/vendor/gonum.org/v1/gonum/lapack/gonum/dtrtri.go new file mode 100644 index 0000000000..6ec3663c35 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dtrtri.go @@ -0,0 +1,72 @@ +// Copyright ©2015 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gonum + +import ( + "gonum.org/v1/gonum/blas" + "gonum.org/v1/gonum/blas/blas64" +) + +// Dtrtri computes the inverse of a triangular matrix, storing the result in place +// into a. This is the BLAS level 3 version of the algorithm which builds upon +// Dtrti2 to operate on matrix blocks instead of only individual columns. +// +// Dtrtri will not perform the inversion if the matrix is singular, and returns +// a boolean indicating whether the inversion was successful. +func (impl Implementation) Dtrtri(uplo blas.Uplo, diag blas.Diag, n int, a []float64, lda int) (ok bool) { + switch { + case uplo != blas.Upper && uplo != blas.Lower: + panic(badUplo) + case diag != blas.NonUnit && diag != blas.Unit: + panic(badDiag) + case n < 0: + panic(nLT0) + case lda < max(1, n): + panic(badLdA) + } + + if n == 0 { + return true + } + + if len(a) < (n-1)*lda+n { + panic(shortA) + } + + if diag == blas.NonUnit { + for i := 0; i < n; i++ { + if a[i*lda+i] == 0 { + return false + } + } + } + + bi := blas64.Implementation() + + nb := impl.Ilaenv(1, "DTRTRI", "UD", n, -1, -1, -1) + if nb <= 1 || nb > n { + impl.Dtrti2(uplo, diag, n, a, lda) + return true + } + if uplo == blas.Upper { + for j := 0; j < n; j += nb { + jb := min(nb, n-j) + bi.Dtrmm(blas.Left, blas.Upper, blas.NoTrans, diag, j, jb, 1, a, lda, a[j:], lda) + bi.Dtrsm(blas.Right, blas.Upper, blas.NoTrans, diag, j, jb, -1, a[j*lda+j:], lda, a[j:], lda) + impl.Dtrti2(blas.Upper, diag, jb, a[j*lda+j:], lda) + } + return true + } + nn := ((n - 1) / nb) * nb + for j := nn; j >= 0; j -= nb { + jb := min(nb, n-j) + if j+jb <= n-1 { + bi.Dtrmm(blas.Left, blas.Lower, blas.NoTrans, diag, n-j-jb, jb, 1, a[(j+jb)*lda+j+jb:], lda, a[(j+jb)*lda+j:], lda) + bi.Dtrsm(blas.Right, blas.Lower, blas.NoTrans, diag, n-j-jb, jb, -1, a[j*lda+j:], lda, a[(j+jb)*lda+j:], lda) + } + impl.Dtrti2(blas.Lower, diag, jb, a[j*lda+j:], lda) + } + return true +} diff --git a/vendor/gonum.org/v1/gonum/lapack/gonum/dtrtrs.go b/vendor/gonum.org/v1/gonum/lapack/gonum/dtrtrs.go new file mode 100644 index 0000000000..2145fbd5fd --- /dev/null +++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dtrtrs.go @@ -0,0 +1,55 @@ +// Copyright ©2015 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gonum + +import ( + "gonum.org/v1/gonum/blas" + "gonum.org/v1/gonum/blas/blas64" +) + +// Dtrtrs solves a triangular system of the form A * X = B or Aᵀ * X = B. Dtrtrs +// returns whether the solve completed successfully. If A is singular, no solve is performed. +func (impl Implementation) Dtrtrs(uplo blas.Uplo, trans blas.Transpose, diag blas.Diag, n, nrhs int, a []float64, lda int, b []float64, ldb int) (ok bool) { + switch { + case uplo != blas.Upper && uplo != blas.Lower: + panic(badUplo) + case trans != blas.NoTrans && trans != blas.Trans && trans != blas.ConjTrans: + panic(badTrans) + case diag != blas.NonUnit && diag != blas.Unit: + panic(badDiag) + case n < 0: + panic(nLT0) + case nrhs < 0: + panic(nrhsLT0) + case lda < max(1, n): + panic(badLdA) + case ldb < max(1, nrhs): + panic(badLdB) + } + + if n == 0 { + return true + } + + switch { + case len(a) < (n-1)*lda+n: + panic(shortA) + case len(b) < (n-1)*ldb+nrhs: + panic(shortB) + } + + // Check for singularity. + nounit := diag == blas.NonUnit + if nounit { + for i := 0; i < n; i++ { + if a[i*lda+i] == 0 { + return false + } + } + } + bi := blas64.Implementation() + bi.Dtrsm(blas.Left, uplo, trans, diag, n, nrhs, 1, a, lda, b, ldb) + return true +} diff --git a/vendor/gonum.org/v1/gonum/lapack/gonum/errors.go b/vendor/gonum.org/v1/gonum/lapack/gonum/errors.go new file mode 100644 index 0000000000..711cc2d5ad --- /dev/null +++ b/vendor/gonum.org/v1/gonum/lapack/gonum/errors.go @@ -0,0 +1,183 @@ +// Copyright ©2015 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gonum + +// This list is duplicated in netlib/lapack/netlib. Keep in sync. +const ( + // Panic strings for bad enumeration values. + badApplyOrtho = "lapack: bad ApplyOrtho" + badBalanceJob = "lapack: bad BalanceJob" + badDiag = "lapack: bad Diag" + badDirect = "lapack: bad Direct" + badEVComp = "lapack: bad EVComp" + badEVHowMany = "lapack: bad EVHowMany" + badEVJob = "lapack: bad EVJob" + badEVSide = "lapack: bad EVSide" + badGSVDJob = "lapack: bad GSVDJob" + badGenOrtho = "lapack: bad GenOrtho" + badLeftEVJob = "lapack: bad LeftEVJob" + badMatrixType = "lapack: bad MatrixType" + badMaximizeNormXJob = "lapack: bad MaximizeNormXJob" + badNorm = "lapack: bad Norm" + badOrthoComp = "lapack: bad OrthoComp" + badPivot = "lapack: bad Pivot" + badRightEVJob = "lapack: bad RightEVJob" + badSVDJob = "lapack: bad SVDJob" + badSchurComp = "lapack: bad SchurComp" + badSchurJob = "lapack: bad SchurJob" + badSide = "lapack: bad Side" + badSort = "lapack: bad Sort" + badStoreV = "lapack: bad StoreV" + badTrans = "lapack: bad Trans" + badUpdateSchurComp = "lapack: bad UpdateSchurComp" + badUplo = "lapack: bad Uplo" + bothSVDOver = "lapack: both jobU and jobVT are lapack.SVDOverwrite" + + // Panic strings for bad numerical and string values. + badIfst = "lapack: ifst out of range" + badIhi = "lapack: ihi out of range" + badIhiz = "lapack: ihiz out of range" + badIlo = "lapack: ilo out of range" + badIloz = "lapack: iloz out of range" + badIlst = "lapack: ilst out of range" + badIsave = "lapack: bad isave value" + badIspec = "lapack: bad ispec value" + badJ1 = "lapack: j1 out of range" + badJpvt = "lapack: bad element of jpvt" + badK1 = "lapack: k1 out of range" + badK2 = "lapack: k2 out of range" + badKacc22 = "lapack: invalid value of kacc22" + badKbot = "lapack: kbot out of range" + badKtop = "lapack: ktop out of range" + badLWork = "lapack: insufficient declared workspace length" + badMm = "lapack: mm out of range" + badN1 = "lapack: bad value of n1" + badN2 = "lapack: bad value of n2" + badNa = "lapack: bad value of na" + badName = "lapack: bad name" + badNh = "lapack: bad value of nh" + badNw = "lapack: bad value of nw" + badPp = "lapack: bad value of pp" + badShifts = "lapack: bad shifts" + i0LT0 = "lapack: i0 < 0" + kGTM = "lapack: k > m" + kGTN = "lapack: k > n" + kLT0 = "lapack: k < 0" + kLT1 = "lapack: k < 1" + kdLT0 = "lapack: kd < 0" + klLT0 = "lapack: kl < 0" + kuLT0 = "lapack: ku < 0" + mGTN = "lapack: m > n" + mLT0 = "lapack: m < 0" + mmLT0 = "lapack: mm < 0" + n0LT0 = "lapack: n0 < 0" + nGTM = "lapack: n > m" + nLT0 = "lapack: n < 0" + nLT1 = "lapack: n < 1" + nLTM = "lapack: n < m" + nanCFrom = "lapack: cfrom is NaN" + nanCTo = "lapack: cto is NaN" + nbGTM = "lapack: nb > m" + nbGTN = "lapack: nb > n" + nbLT0 = "lapack: nb < 0" + nccLT0 = "lapack: ncc < 0" + ncvtLT0 = "lapack: ncvt < 0" + negANorm = "lapack: anorm < 0" + negZ = "lapack: negative z value" + nhLT0 = "lapack: nh < 0" + notIsolated = "lapack: block is not isolated" + nrhsLT0 = "lapack: nrhs < 0" + nruLT0 = "lapack: nru < 0" + nshftsLT0 = "lapack: nshfts < 0" + nshftsOdd = "lapack: nshfts must be even" + nvLT0 = "lapack: nv < 0" + offsetGTM = "lapack: offset > m" + offsetLT0 = "lapack: offset < 0" + pLT0 = "lapack: p < 0" + recurLT0 = "lapack: recur < 0" + zeroCFrom = "lapack: zero cfrom" + + // Panic strings for bad slice lengths. + badLenAlpha = "lapack: bad length of alpha" + badLenBeta = "lapack: bad length of beta" + badLenIpiv = "lapack: bad length of ipiv" + badLenJpiv = "lapack: bad length of jpiv" + badLenJpvt = "lapack: bad length of jpvt" + badLenK = "lapack: bad length of k" + badLenPiv = "lapack: bad length of piv" + badLenSelected = "lapack: bad length of selected" + badLenSi = "lapack: bad length of si" + badLenSr = "lapack: bad length of sr" + badLenTau = "lapack: bad length of tau" + badLenWi = "lapack: bad length of wi" + badLenWr = "lapack: bad length of wr" + + // Panic strings for insufficient slice lengths. + shortA = "lapack: insufficient length of a" + shortAB = "lapack: insufficient length of ab" + shortAuxv = "lapack: insufficient length of auxv" + shortB = "lapack: insufficient length of b" + shortC = "lapack: insufficient length of c" + shortCNorm = "lapack: insufficient length of cnorm" + shortD = "lapack: insufficient length of d" + shortDL = "lapack: insufficient length of dl" + shortDU = "lapack: insufficient length of du" + shortE = "lapack: insufficient length of e" + shortF = "lapack: insufficient length of f" + shortH = "lapack: insufficient length of h" + shortIWork = "lapack: insufficient length of iwork" + shortIsgn = "lapack: insufficient length of isgn" + shortQ = "lapack: insufficient length of q" + shortRHS = "lapack: insufficient length of rhs" + shortS = "lapack: insufficient length of s" + shortScale = "lapack: insufficient length of scale" + shortT = "lapack: insufficient length of t" + shortTau = "lapack: insufficient length of tau" + shortTauP = "lapack: insufficient length of tauP" + shortTauQ = "lapack: insufficient length of tauQ" + shortU = "lapack: insufficient length of u" + shortV = "lapack: insufficient length of v" + shortVL = "lapack: insufficient length of vl" + shortVR = "lapack: insufficient length of vr" + shortVT = "lapack: insufficient length of vt" + shortVn1 = "lapack: insufficient length of vn1" + shortVn2 = "lapack: insufficient length of vn2" + shortW = "lapack: insufficient length of w" + shortWH = "lapack: insufficient length of wh" + shortWV = "lapack: insufficient length of wv" + shortWi = "lapack: insufficient length of wi" + shortWork = "lapack: insufficient length of work" + shortWr = "lapack: insufficient length of wr" + shortX = "lapack: insufficient length of x" + shortY = "lapack: insufficient length of y" + shortZ = "lapack: insufficient length of z" + + // Panic strings for bad leading dimensions of matrices. + badLdA = "lapack: bad leading dimension of A" + badLdB = "lapack: bad leading dimension of B" + badLdC = "lapack: bad leading dimension of C" + badLdF = "lapack: bad leading dimension of F" + badLdH = "lapack: bad leading dimension of H" + badLdQ = "lapack: bad leading dimension of Q" + badLdT = "lapack: bad leading dimension of T" + badLdU = "lapack: bad leading dimension of U" + badLdV = "lapack: bad leading dimension of V" + badLdVL = "lapack: bad leading dimension of VL" + badLdVR = "lapack: bad leading dimension of VR" + badLdVT = "lapack: bad leading dimension of VT" + badLdW = "lapack: bad leading dimension of W" + badLdWH = "lapack: bad leading dimension of WH" + badLdWV = "lapack: bad leading dimension of WV" + badLdWork = "lapack: bad leading dimension of Work" + badLdX = "lapack: bad leading dimension of X" + badLdY = "lapack: bad leading dimension of Y" + badLdZ = "lapack: bad leading dimension of Z" + + // Panic strings for bad vector increments. + absIncNotOne = "lapack: increment not one or negative one" + badIncX = "lapack: incX <= 0" + badIncY = "lapack: incY <= 0" + zeroIncV = "lapack: incv == 0" +) diff --git a/vendor/gonum.org/v1/gonum/lapack/gonum/iladlc.go b/vendor/gonum.org/v1/gonum/lapack/gonum/iladlc.go new file mode 100644 index 0000000000..b251d72691 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/lapack/gonum/iladlc.go @@ -0,0 +1,45 @@ +// Copyright ©2015 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gonum + +// Iladlc scans a matrix for its last non-zero column. Returns -1 if the matrix +// is all zeros. +// +// Iladlc is an internal routine. It is exported for testing purposes. +func (Implementation) Iladlc(m, n int, a []float64, lda int) int { + switch { + case m < 0: + panic(mLT0) + case n < 0: + panic(nLT0) + case lda < max(1, n): + panic(badLdA) + } + + if n == 0 || m == 0 { + return -1 + } + + if len(a) < (m-1)*lda+n { + panic(shortA) + } + + // Test common case where corner is non-zero. + if a[n-1] != 0 || a[(m-1)*lda+(n-1)] != 0 { + return n - 1 + } + + // Scan each row tracking the highest column seen. + highest := -1 + for i := 0; i < m; i++ { + for j := n - 1; j >= 0; j-- { + if a[i*lda+j] != 0 { + highest = max(highest, j) + break + } + } + } + return highest +} diff --git a/vendor/gonum.org/v1/gonum/lapack/gonum/iladlr.go b/vendor/gonum.org/v1/gonum/lapack/gonum/iladlr.go new file mode 100644 index 0000000000..b73fe18ea2 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/lapack/gonum/iladlr.go @@ -0,0 +1,41 @@ +// Copyright ©2015 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gonum + +// Iladlr scans a matrix for its last non-zero row. Returns -1 if the matrix +// is all zeros. +// +// Iladlr is an internal routine. It is exported for testing purposes. +func (Implementation) Iladlr(m, n int, a []float64, lda int) int { + switch { + case m < 0: + panic(mLT0) + case n < 0: + panic(nLT0) + case lda < max(1, n): + panic(badLdA) + } + + if n == 0 || m == 0 { + return -1 + } + + if len(a) < (m-1)*lda+n { + panic(shortA) + } + + // Check the common case where the corner is non-zero + if a[(m-1)*lda] != 0 || a[(m-1)*lda+n-1] != 0 { + return m - 1 + } + for i := m - 1; i >= 0; i-- { + for j := 0; j < n; j++ { + if a[i*lda+j] != 0 { + return i + } + } + } + return -1 +} diff --git a/vendor/gonum.org/v1/gonum/lapack/gonum/ilaenv.go b/vendor/gonum.org/v1/gonum/lapack/gonum/ilaenv.go new file mode 100644 index 0000000000..fc70806c45 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/lapack/gonum/ilaenv.go @@ -0,0 +1,395 @@ +// Copyright ©2015 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gonum + +// Ilaenv returns algorithm tuning parameters for the algorithm given by the +// input string. ispec specifies the parameter to return: +// +// 1: The optimal block size for a blocked algorithm. +// 2: The minimum block size for a blocked algorithm. +// 3: The block size of unprocessed data at which a blocked algorithm should +// crossover to an unblocked version. +// 4: The number of shifts. +// 5: The minimum column dimension for blocking to be used. +// 6: The crossover point for SVD (to use QR factorization or not). +// 7: The number of processors. +// 8: The crossover point for multi-shift in QR and QZ methods for non-symmetric eigenvalue problems. +// 9: Maximum size of the subproblems in divide-and-conquer algorithms. +// 10: ieee infinity and NaN arithmetic can be trusted not to trap. +// 11: ieee infinity arithmetic can be trusted not to trap. +// 12...16: parameters for Dhseqr and related functions. See Iparmq for more +// information. +// +// Ilaenv is an internal routine. It is exported for testing purposes. +func (impl Implementation) Ilaenv(ispec int, name string, opts string, n1, n2, n3, n4 int) int { + // TODO(btracey): Replace this with a constant lookup? A list of constants? + sname := name[0] == 'S' || name[0] == 'D' + cname := name[0] == 'C' || name[0] == 'Z' + if !sname && !cname { + panic(badName) + } + c2 := name[1:3] + c3 := name[3:6] + c4 := c3[1:3] + + switch ispec { + default: + panic(badIspec) + case 1: + switch c2 { + default: + panic(badName) + case "GE": + switch c3 { + default: + panic(badName) + case "TRF": + if sname { + return 64 + } + return 64 + case "QRF", "RQF", "LQF", "QLF": + if sname { + return 32 + } + return 32 + case "HRD": + if sname { + return 32 + } + return 32 + case "BRD": + if sname { + return 32 + } + return 32 + case "TRI": + if sname { + return 64 + } + return 64 + } + case "PO": + switch c3 { + default: + panic(badName) + case "TRF": + if sname { + return 64 + } + return 64 + } + case "SY": + switch c3 { + default: + panic(badName) + case "TRF": + if sname { + return 64 + } + return 64 + case "TRD": + return 32 + case "GST": + return 64 + } + case "HE": + switch c3 { + default: + panic(badName) + case "TRF": + return 64 + case "TRD": + return 32 + case "GST": + return 64 + } + case "OR": + switch c3[0] { + default: + panic(badName) + case 'G': + switch c3[1:] { + default: + panic(badName) + case "QR", "RQ", "LQ", "QL", "HR", "TR", "BR": + return 32 + } + case 'M': + switch c3[1:] { + default: + panic(badName) + case "QR", "RQ", "LQ", "QL", "HR", "TR", "BR": + return 32 + } + } + case "UN": + switch c3[0] { + default: + panic(badName) + case 'G': + switch c3[1:] { + default: + panic(badName) + case "QR", "RQ", "LQ", "QL", "HR", "TR", "BR": + return 32 + } + case 'M': + switch c3[1:] { + default: + panic(badName) + case "QR", "RQ", "LQ", "QL", "HR", "TR", "BR": + return 32 + } + } + case "GB": + switch c3 { + default: + panic(badName) + case "TRF": + if sname { + if n4 <= 64 { + return 1 + } + return 32 + } + if n4 <= 64 { + return 1 + } + return 32 + } + case "PB": + switch c3 { + default: + panic(badName) + case "TRF": + if sname { + if n2 <= 64 { + return 1 + } + return 32 + } + if n2 <= 64 { + return 1 + } + return 32 + } + case "PT": + switch c3 { + default: + panic(badName) + case "TRS": + return 1 + } + case "TR": + switch c3 { + default: + panic(badName) + case "TRI": + if sname { + return 64 + } + return 64 + case "EVC": + if sname { + return 64 + } + return 64 + } + case "LA": + switch c3 { + default: + panic(badName) + case "UUM": + if sname { + return 64 + } + return 64 + } + case "ST": + if sname && c3 == "EBZ" { + return 1 + } + panic(badName) + } + case 2: + switch c2 { + default: + panic(badName) + case "GE": + switch c3 { + default: + panic(badName) + case "QRF", "RQF", "LQF", "QLF": + if sname { + return 2 + } + return 2 + case "HRD": + if sname { + return 2 + } + return 2 + case "BRD": + if sname { + return 2 + } + return 2 + case "TRI": + if sname { + return 2 + } + return 2 + } + case "SY": + switch c3 { + default: + panic(badName) + case "TRF": + if sname { + return 8 + } + return 8 + case "TRD": + if sname { + return 2 + } + panic(badName) + } + case "HE": + if c3 == "TRD" { + return 2 + } + panic(badName) + case "OR": + if !sname { + panic(badName) + } + switch c3[0] { + default: + panic(badName) + case 'G': + switch c4 { + default: + panic(badName) + case "QR", "RQ", "LQ", "QL", "HR", "TR", "BR": + return 2 + } + case 'M': + switch c4 { + default: + panic(badName) + case "QR", "RQ", "LQ", "QL", "HR", "TR", "BR": + return 2 + } + } + case "UN": + switch c3[0] { + default: + panic(badName) + case 'G': + switch c4 { + default: + panic(badName) + case "QR", "RQ", "LQ", "QL", "HR", "TR", "BR": + return 2 + } + case 'M': + switch c4 { + default: + panic(badName) + case "QR", "RQ", "LQ", "QL", "HR", "TR", "BR": + return 2 + } + } + } + case 3: + switch c2 { + default: + panic(badName) + case "GE": + switch c3 { + default: + panic(badName) + case "QRF", "RQF", "LQF", "QLF": + if sname { + return 128 + } + return 128 + case "HRD": + if sname { + return 128 + } + return 128 + case "BRD": + if sname { + return 128 + } + return 128 + } + case "SY": + if sname && c3 == "TRD" { + return 32 + } + panic(badName) + case "HE": + if c3 == "TRD" { + return 32 + } + panic(badName) + case "OR": + switch c3[0] { + default: + panic(badName) + case 'G': + switch c4 { + default: + panic(badName) + case "QR", "RQ", "LQ", "QL", "HR", "TR", "BR": + return 128 + } + } + case "UN": + switch c3[0] { + default: + panic(badName) + case 'G': + switch c4 { + default: + panic(badName) + case "QR", "RQ", "LQ", "QL", "HR", "TR", "BR": + return 128 + } + } + } + case 4: + // Used by xHSEQR + return 6 + case 5: + // Not used + return 2 + case 6: + // Used by xGELSS and xGESVD + return int(float64(min(n1, n2)) * 1.6) + case 7: + // Not used + return 1 + case 8: + // Used by xHSEQR + return 50 + case 9: + // used by xGELSD and xGESDD + return 25 + case 10: + // Go guarantees ieee + return 1 + case 11: + // Go guarantees ieee + return 1 + case 12, 13, 14, 15, 16: + // Dhseqr and related functions for eigenvalue problems. + return impl.Iparmq(ispec, name, opts, n1, n2, n3, n4) + } +} diff --git a/vendor/gonum.org/v1/gonum/lapack/gonum/iparmq.go b/vendor/gonum.org/v1/gonum/lapack/gonum/iparmq.go new file mode 100644 index 0000000000..65d105245e --- /dev/null +++ b/vendor/gonum.org/v1/gonum/lapack/gonum/iparmq.go @@ -0,0 +1,117 @@ +// Copyright ©2016 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gonum + +import "math" + +// Iparmq returns problem and machine dependent parameters useful for Dhseqr and +// related subroutines for eigenvalue problems. +// +// ispec specifies the parameter to return: +// +// 12: Crossover point between Dlahqr and Dlaqr0. Will be at least 11. +// 13: Deflation window size. +// 14: Nibble crossover point. Determines when to skip a multi-shift QR sweep. +// 15: Number of simultaneous shifts in a multishift QR iteration. +// 16: Select structured matrix multiply. +// +// For other values of ispec Iparmq will panic. +// +// name is the name of the calling function. name must be in uppercase but this +// is not checked. +// +// opts is not used and exists for future use. +// +// n is the order of the Hessenberg matrix H. +// +// ilo and ihi specify the block [ilo:ihi+1,ilo:ihi+1] that is being processed. +// +// lwork is the amount of workspace available. +// +// Except for ispec input parameters are not checked. +// +// Iparmq is an internal routine. It is exported for testing purposes. +func (Implementation) Iparmq(ispec int, name, opts string, n, ilo, ihi, lwork int) int { + nh := ihi - ilo + 1 + ns := 2 + switch { + case nh >= 30: + ns = 4 + case nh >= 60: + ns = 10 + case nh >= 150: + ns = max(10, nh/int(math.Log(float64(nh))/math.Ln2)) + case nh >= 590: + ns = 64 + case nh >= 3000: + ns = 128 + case nh >= 6000: + ns = 256 + } + ns = max(2, ns-(ns%2)) + + switch ispec { + default: + panic(badIspec) + + case 12: + // Matrices of order smaller than nmin get sent to Dlahqr, the + // classic double shift algorithm. This must be at least 11. + const nmin = 75 + return nmin + + case 13: + const knwswp = 500 + if nh <= knwswp { + return ns + } + return 3 * ns / 2 + + case 14: + // Skip a computationally expensive multi-shift QR sweep with + // Dlaqr5 whenever aggressive early deflation finds at least + // nibble*(window size)/100 deflations. The default, small, + // value reflects the expectation that the cost of looking + // through the deflation window with Dlaqr3 will be + // substantially smaller. + const nibble = 14 + return nibble + + case 15: + return ns + + case 16: + if len(name) != 6 { + panic(badName) + } + const ( + k22min = 14 + kacmin = 14 + ) + var acc22 int + switch { + case name[1:] == "GGHRD" || name[1:] == "GGHD3": + acc22 = 1 + if nh >= k22min { + acc22 = 2 + } + case name[3:] == "EXC": + if nh >= kacmin { + acc22 = 1 + } + if nh >= k22min { + acc22 = 2 + } + case name[1:] == "HSEQR" || name[1:5] == "LAQR": + if ns >= kacmin { + acc22 = 1 + } + if ns >= k22min { + acc22 = 2 + } + } + return acc22 + } +} diff --git a/vendor/gonum.org/v1/gonum/lapack/gonum/lapack.go b/vendor/gonum.org/v1/gonum/lapack/gonum/lapack.go new file mode 100644 index 0000000000..5daefc584d --- /dev/null +++ b/vendor/gonum.org/v1/gonum/lapack/gonum/lapack.go @@ -0,0 +1,64 @@ +// Copyright ©2015 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gonum + +import "gonum.org/v1/gonum/lapack" + +// Implementation is the native Go implementation of LAPACK routines. It +// is built on top of calls to the return of blas64.Implementation(), so while +// this code is in pure Go, the underlying BLAS implementation may not be. +type Implementation struct{} + +var _ lapack.Float64 = Implementation{} + +func abs(a int) int { + if a < 0 { + return -a + } + return a +} + +const ( + // dlamchE is the machine epsilon. For IEEE this is 2^{-53}. + dlamchE = 0x1p-53 + + // dlamchB is the radix of the machine (the base of the number system). + dlamchB = 2 + + // dlamchP is base * eps. + dlamchP = dlamchB * dlamchE + + // dlamchS is the "safe minimum", that is, the lowest number such that + // 1/dlamchS does not overflow, or also the smallest normal number. + // For IEEE this is 2^{-1022}. + dlamchS = 0x1p-1022 + + // Blue's scaling constants + // + // An n-vector x is well-scaled if + // dtsml ≤ |xᵢ| ≤ dtbig for 0 ≤ i < n and n ≤ 1/dlamchP, + // where + // dtsml = 2^ceil((expmin-1)/2) = 2^ceil((-1021-1)/2) = 2^{-511} = 1.4916681462400413e-154 + // dtbig = 2^floor((expmax-digits+1)/2) = 2^floor((1024-53+1)/2) = 2^{486} = 1.997919072202235e+146 + // If any xᵢ is not well-scaled, then multiplying small values by dssml and + // large values by dsbig avoids underflow or overflow when computing the sum + // of squares \sum_0^{n-1} (xᵢ)². + // dssml = 2^{-floor((expmin-digits)/2)} = 2^{-floor((-1021-53)/2)} = 2^537 = 4.4989137945431964e+161 + // dsbig = 2^{-ceil((expmax+digits-1)/2)} = 2^{-ceil((1024+53-1)/2)} = 2^{-538} = 1.1113793747425387e-162 + // + // References: + // - Anderson E. (2017) + // Algorithm 978: Safe Scaling in the Level 1 BLAS + // ACM Trans Math Softw 44:1--28 + // https://doi.org/10.1145/3061665 + // - Blue, James L. (1978) + // A Portable Fortran Program to Find the Euclidean Norm of a Vector + // ACM Trans Math Softw 4:15--23 + // https://doi.org/10.1145/355769.355771 + dtsml = 0x1p-511 + dtbig = 0x1p486 + dssml = 0x1p537 + dsbig = 0x1p-538 +) diff --git a/vendor/gonum.org/v1/gonum/lapack/lapack.go b/vendor/gonum.org/v1/gonum/lapack/lapack.go new file mode 100644 index 0000000000..60ef1c244a --- /dev/null +++ b/vendor/gonum.org/v1/gonum/lapack/lapack.go @@ -0,0 +1,240 @@ +// Copyright ©2015 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package lapack + +import "gonum.org/v1/gonum/blas" + +// Complex128 defines the public complex128 LAPACK API supported by gonum/lapack. +type Complex128 interface{} + +// Float64 defines the public float64 LAPACK API supported by gonum/lapack. +type Float64 interface { + Dgecon(norm MatrixNorm, n int, a []float64, lda int, anorm float64, work []float64, iwork []int) float64 + Dgeev(jobvl LeftEVJob, jobvr RightEVJob, n int, a []float64, lda int, wr, wi []float64, vl []float64, ldvl int, vr []float64, ldvr int, work []float64, lwork int) (first int) + Dgels(trans blas.Transpose, m, n, nrhs int, a []float64, lda int, b []float64, ldb int, work []float64, lwork int) bool + Dgelqf(m, n int, a []float64, lda int, tau, work []float64, lwork int) + Dgeqp3(m, n int, a []float64, lda int, jpvt []int, tau, work []float64, lwork int) + Dgeqrf(m, n int, a []float64, lda int, tau, work []float64, lwork int) + Dgesvd(jobU, jobVT SVDJob, m, n int, a []float64, lda int, s, u []float64, ldu int, vt []float64, ldvt int, work []float64, lwork int) (ok bool) + Dgetrf(m, n int, a []float64, lda int, ipiv []int) (ok bool) + Dgetri(n int, a []float64, lda int, ipiv []int, work []float64, lwork int) (ok bool) + Dgetrs(trans blas.Transpose, n, nrhs int, a []float64, lda int, ipiv []int, b []float64, ldb int) + Dggsvd3(jobU, jobV, jobQ GSVDJob, m, n, p int, a []float64, lda int, b []float64, ldb int, alpha, beta, u []float64, ldu int, v []float64, ldv int, q []float64, ldq int, work []float64, lwork int, iwork []int) (k, l int, ok bool) + Dlantr(norm MatrixNorm, uplo blas.Uplo, diag blas.Diag, m, n int, a []float64, lda int, work []float64) float64 + Dlange(norm MatrixNorm, m, n int, a []float64, lda int, work []float64) float64 + Dlansy(norm MatrixNorm, uplo blas.Uplo, n int, a []float64, lda int, work []float64) float64 + Dlapmr(forward bool, m, n int, x []float64, ldx int, k []int) + Dlapmt(forward bool, m, n int, x []float64, ldx int, k []int) + Dorgqr(m, n, k int, a []float64, lda int, tau, work []float64, lwork int) + Dormqr(side blas.Side, trans blas.Transpose, m, n, k int, a []float64, lda int, tau, c []float64, ldc int, work []float64, lwork int) + Dorglq(m, n, k int, a []float64, lda int, tau, work []float64, lwork int) + Dormlq(side blas.Side, trans blas.Transpose, m, n, k int, a []float64, lda int, tau, c []float64, ldc int, work []float64, lwork int) + Dpbcon(uplo blas.Uplo, n, kd int, ab []float64, ldab int, anorm float64, work []float64, iwork []int) float64 + Dpbtrf(uplo blas.Uplo, n, kd int, ab []float64, ldab int) (ok bool) + Dpbtrs(uplo blas.Uplo, n, kd, nrhs int, ab []float64, ldab int, b []float64, ldb int) + Dpocon(uplo blas.Uplo, n int, a []float64, lda int, anorm float64, work []float64, iwork []int) float64 + Dpotrf(ul blas.Uplo, n int, a []float64, lda int) (ok bool) + Dpotri(ul blas.Uplo, n int, a []float64, lda int) (ok bool) + Dpotrs(ul blas.Uplo, n, nrhs int, a []float64, lda int, b []float64, ldb int) + Dpstrf(uplo blas.Uplo, n int, a []float64, lda int, piv []int, tol float64, work []float64) (rank int, ok bool) + Dsyev(jobz EVJob, uplo blas.Uplo, n int, a []float64, lda int, w, work []float64, lwork int) (ok bool) + Dtbtrs(uplo blas.Uplo, trans blas.Transpose, diag blas.Diag, n, kd, nrhs int, a []float64, lda int, b []float64, ldb int) (ok bool) + Dtrcon(norm MatrixNorm, uplo blas.Uplo, diag blas.Diag, n int, a []float64, lda int, work []float64, iwork []int) float64 + Dtrtri(uplo blas.Uplo, diag blas.Diag, n int, a []float64, lda int) (ok bool) + Dtrtrs(uplo blas.Uplo, trans blas.Transpose, diag blas.Diag, n, nrhs int, a []float64, lda int, b []float64, ldb int) (ok bool) +} + +// Direct specifies the direction of the multiplication for the Householder matrix. +type Direct byte + +const ( + Forward Direct = 'F' // Reflectors are right-multiplied, H_0 * H_1 * ... * H_{k-1}. + Backward Direct = 'B' // Reflectors are left-multiplied, H_{k-1} * ... * H_1 * H_0. +) + +// Sort is the sorting order. +type Sort byte + +const ( + SortIncreasing Sort = 'I' + SortDecreasing Sort = 'D' +) + +// StoreV indicates the storage direction of elementary reflectors. +type StoreV byte + +const ( + ColumnWise StoreV = 'C' // Reflector stored in a column of the matrix. + RowWise StoreV = 'R' // Reflector stored in a row of the matrix. +) + +// MatrixNorm represents the kind of matrix norm to compute. +type MatrixNorm byte + +const ( + MaxAbs MatrixNorm = 'M' // max(abs(A(i,j))) + MaxColumnSum MatrixNorm = 'O' // Maximum absolute column sum (one norm) + MaxRowSum MatrixNorm = 'I' // Maximum absolute row sum (infinity norm) + Frobenius MatrixNorm = 'F' // Frobenius norm (sqrt of sum of squares) +) + +// MatrixType represents the kind of matrix represented in the data. +type MatrixType byte + +const ( + General MatrixType = 'G' // A general dense matrix. + UpperTri MatrixType = 'U' // An upper triangular matrix. + LowerTri MatrixType = 'L' // A lower triangular matrix. +) + +// Pivot specifies the pivot type for plane rotations. +type Pivot byte + +const ( + Variable Pivot = 'V' + Top Pivot = 'T' + Bottom Pivot = 'B' +) + +// ApplyOrtho specifies which orthogonal matrix is applied in Dormbr. +type ApplyOrtho byte + +const ( + ApplyP ApplyOrtho = 'P' // Apply P or Pᵀ. + ApplyQ ApplyOrtho = 'Q' // Apply Q or Qᵀ. +) + +// GenOrtho specifies which orthogonal matrix is generated in Dorgbr. +type GenOrtho byte + +const ( + GeneratePT GenOrtho = 'P' // Generate Pᵀ. + GenerateQ GenOrtho = 'Q' // Generate Q. +) + +// SVDJob specifies the singular vector computation type for SVD. +type SVDJob byte + +const ( + SVDAll SVDJob = 'A' // Compute all columns of the orthogonal matrix U or V. + SVDStore SVDJob = 'S' // Compute the singular vectors and store them in the orthogonal matrix U or V. + SVDOverwrite SVDJob = 'O' // Compute the singular vectors and overwrite them on the input matrix A. + SVDNone SVDJob = 'N' // Do not compute singular vectors. +) + +// GSVDJob specifies the singular vector computation type for Generalized SVD. +type GSVDJob byte + +const ( + GSVDU GSVDJob = 'U' // Compute orthogonal matrix U. + GSVDV GSVDJob = 'V' // Compute orthogonal matrix V. + GSVDQ GSVDJob = 'Q' // Compute orthogonal matrix Q. + GSVDUnit GSVDJob = 'I' // Use unit-initialized matrix. + GSVDNone GSVDJob = 'N' // Do not compute orthogonal matrix. +) + +// EVComp specifies how eigenvectors are computed in Dsteqr. +type EVComp byte + +const ( + EVOrig EVComp = 'V' // Compute eigenvectors of the original symmetric matrix. + EVTridiag EVComp = 'I' // Compute eigenvectors of the tridiagonal matrix. + EVCompNone EVComp = 'N' // Do not compute eigenvectors. +) + +// EVJob specifies whether eigenvectors are computed in Dsyev. +type EVJob byte + +const ( + EVCompute EVJob = 'V' // Compute eigenvectors. + EVNone EVJob = 'N' // Do not compute eigenvectors. +) + +// LeftEVJob specifies whether left eigenvectors are computed in Dgeev. +type LeftEVJob byte + +const ( + LeftEVCompute LeftEVJob = 'V' // Compute left eigenvectors. + LeftEVNone LeftEVJob = 'N' // Do not compute left eigenvectors. +) + +// RightEVJob specifies whether right eigenvectors are computed in Dgeev. +type RightEVJob byte + +const ( + RightEVCompute RightEVJob = 'V' // Compute right eigenvectors. + RightEVNone RightEVJob = 'N' // Do not compute right eigenvectors. +) + +// BalanceJob specifies matrix balancing operation. +type BalanceJob byte + +const ( + Permute BalanceJob = 'P' + Scale BalanceJob = 'S' + PermuteScale BalanceJob = 'B' + BalanceNone BalanceJob = 'N' +) + +// SchurJob specifies whether the Schur form is computed in Dhseqr. +type SchurJob byte + +const ( + EigenvaluesOnly SchurJob = 'E' + EigenvaluesAndSchur SchurJob = 'S' +) + +// SchurComp specifies whether and how the Schur vectors are computed in Dhseqr. +type SchurComp byte + +const ( + SchurOrig SchurComp = 'V' // Compute Schur vectors of the original matrix. + SchurHess SchurComp = 'I' // Compute Schur vectors of the upper Hessenberg matrix. + SchurNone SchurComp = 'N' // Do not compute Schur vectors. +) + +// UpdateSchurComp specifies whether the matrix of Schur vectors is updated in Dtrexc. +type UpdateSchurComp byte + +const ( + UpdateSchur UpdateSchurComp = 'V' // Update the matrix of Schur vectors. + UpdateSchurNone UpdateSchurComp = 'N' // Do not update the matrix of Schur vectors. +) + +// EVSide specifies what eigenvectors are computed in Dtrevc3. +type EVSide byte + +const ( + EVRight EVSide = 'R' // Compute only right eigenvectors. + EVLeft EVSide = 'L' // Compute only left eigenvectors. + EVBoth EVSide = 'B' // Compute both right and left eigenvectors. +) + +// EVHowMany specifies which eigenvectors are computed in Dtrevc3 and how. +type EVHowMany byte + +const ( + EVAll EVHowMany = 'A' // Compute all right and/or left eigenvectors. + EVAllMulQ EVHowMany = 'B' // Compute all right and/or left eigenvectors multiplied by an input matrix. + EVSelected EVHowMany = 'S' // Compute selected right and/or left eigenvectors. +) + +// MaximizeNormXJob specifies the heuristic method for computing a contribution to +// the reciprocal Dif-estimate in Dlatdf. +type MaximizeNormXJob byte + +const ( + LocalLookAhead MaximizeNormXJob = 0 // Solve Z*x=h-f where h is a vector of ±1. + NormalizedNullVector MaximizeNormXJob = 2 // Compute an approximate null-vector e of Z, normalize e and solve Z*x=±e-f. +) + +// OrthoComp specifies whether and how the orthogonal matrix is computed in Dgghrd. +type OrthoComp byte + +const ( + OrthoNone OrthoComp = 'N' // Do not compute the orthogonal matrix. + OrthoExplicit OrthoComp = 'I' // The orthogonal matrix is formed explicitly and returned in the argument. + OrthoPostmul OrthoComp = 'V' // The orthogonal matrix is post-multiplied into the matrix stored in the argument on entry. +) diff --git a/vendor/gonum.org/v1/gonum/lapack/lapack64/doc.go b/vendor/gonum.org/v1/gonum/lapack/lapack64/doc.go new file mode 100644 index 0000000000..da19e3ec78 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/lapack/lapack64/doc.go @@ -0,0 +1,20 @@ +// Copyright ©2017 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package lapack64 provides a set of convenient wrapper functions for LAPACK +// calls, as specified in the netlib standard (www.netlib.org). +// +// The native Go routines are used by default, and the Use function can be used +// to set an alternative implementation. +// +// If the type of matrix (General, Symmetric, etc.) is known and fixed, it is +// used in the wrapper signature. In many cases, however, the type of the matrix +// changes during the call to the routine, for example the matrix is symmetric on +// entry and is triangular on exit. In these cases the correct types should be checked +// in the documentation. +// +// The full set of Lapack functions is very large, and it is not clear that a +// full implementation is desirable, let alone feasible. Please open up an issue +// if there is a specific function you need and/or are willing to implement. +package lapack64 // import "gonum.org/v1/gonum/lapack/lapack64" diff --git a/vendor/gonum.org/v1/gonum/lapack/lapack64/lapack64.go b/vendor/gonum.org/v1/gonum/lapack/lapack64/lapack64.go new file mode 100644 index 0000000000..1b4c1734a1 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/lapack/lapack64/lapack64.go @@ -0,0 +1,908 @@ +// Copyright ©2015 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package lapack64 + +import ( + "gonum.org/v1/gonum/blas" + "gonum.org/v1/gonum/blas/blas64" + "gonum.org/v1/gonum/lapack" + "gonum.org/v1/gonum/lapack/gonum" +) + +var lapack64 lapack.Float64 = gonum.Implementation{} + +// Use sets the LAPACK float64 implementation to be used by subsequent BLAS calls. +// The default implementation is native.Implementation. +func Use(l lapack.Float64) { + lapack64 = l +} + +// Tridiagonal represents a tridiagonal matrix using its three diagonals. +type Tridiagonal struct { + N int + DL []float64 + D []float64 + DU []float64 +} + +// Potrf computes the Cholesky factorization of a. +// The factorization has the form +// +// A = Uᵀ * U if a.Uplo == blas.Upper, or +// A = L * Lᵀ if a.Uplo == blas.Lower, +// +// where U is an upper triangular matrix and L is lower triangular. +// The triangular matrix is returned in t, and the underlying data between +// a and t is shared. The returned bool indicates whether a is positive +// definite and the factorization could be finished. +func Potrf(a blas64.Symmetric) (t blas64.Triangular, ok bool) { + ok = lapack64.Dpotrf(a.Uplo, a.N, a.Data, max(1, a.Stride)) + t.Uplo = a.Uplo + t.N = a.N + t.Data = a.Data + t.Stride = a.Stride + t.Diag = blas.NonUnit + return +} + +// Potri computes the inverse of a real symmetric positive definite matrix A +// using its Cholesky factorization. +// +// On entry, t contains the triangular factor U or L from the Cholesky +// factorization A = Uᵀ*U or A = L*Lᵀ, as computed by Potrf. +// +// On return, the upper or lower triangle of the (symmetric) inverse of A is +// stored in t, overwriting the input factor U or L, and also returned in a. The +// underlying data between a and t is shared. +// +// The returned bool indicates whether the inverse was computed successfully. +func Potri(t blas64.Triangular) (a blas64.Symmetric, ok bool) { + ok = lapack64.Dpotri(t.Uplo, t.N, t.Data, max(1, t.Stride)) + a.Uplo = t.Uplo + a.N = t.N + a.Data = t.Data + a.Stride = t.Stride + return +} + +// Potrs solves a system of n linear equations A*X = B where A is an n×n +// symmetric positive definite matrix and B is an n×nrhs matrix, using the +// Cholesky factorization A = Uᵀ*U or A = L*Lᵀ. t contains the corresponding +// triangular factor as returned by Potrf. On entry, B contains the right-hand +// side matrix B, on return it contains the solution matrix X. +func Potrs(t blas64.Triangular, b blas64.General) { + lapack64.Dpotrs(t.Uplo, t.N, b.Cols, t.Data, max(1, t.Stride), b.Data, max(1, b.Stride)) +} + +// Pbcon returns an estimate of the reciprocal of the condition number (in the +// 1-norm) of an n×n symmetric positive definite band matrix using the Cholesky +// factorization +// +// A = Uᵀ*U if uplo == blas.Upper +// A = L*Lᵀ if uplo == blas.Lower +// +// computed by Pbtrf. The estimate is obtained for norm(inv(A)), and the +// reciprocal of the condition number is computed as +// +// rcond = 1 / (anorm * norm(inv(A))). +// +// The length of work must be at least 3*n and the length of iwork must be at +// least n. +func Pbcon(a blas64.SymmetricBand, anorm float64, work []float64, iwork []int) float64 { + return lapack64.Dpbcon(a.Uplo, a.N, a.K, a.Data, a.Stride, anorm, work, iwork) +} + +// Pbtrf computes the Cholesky factorization of an n×n symmetric positive +// definite band matrix +// +// A = Uᵀ * U if a.Uplo == blas.Upper +// A = L * Lᵀ if a.Uplo == blas.Lower +// +// where U and L are upper, respectively lower, triangular band matrices. +// +// The triangular matrix U or L is returned in t, and the underlying data +// between a and t is shared. The returned bool indicates whether A is positive +// definite and the factorization could be finished. +func Pbtrf(a blas64.SymmetricBand) (t blas64.TriangularBand, ok bool) { + ok = lapack64.Dpbtrf(a.Uplo, a.N, a.K, a.Data, max(1, a.Stride)) + t.Uplo = a.Uplo + t.Diag = blas.NonUnit + t.N = a.N + t.K = a.K + t.Data = a.Data + t.Stride = a.Stride + return t, ok +} + +// Pbtrs solves a system of linear equations A*X = B with an n×n symmetric +// positive definite band matrix A using the Cholesky factorization +// +// A = Uᵀ * U if t.Uplo == blas.Upper +// A = L * Lᵀ if t.Uplo == blas.Lower +// +// t contains the corresponding triangular factor as returned by Pbtrf. +// +// On entry, b contains the right hand side matrix B. On return, it is +// overwritten with the solution matrix X. +func Pbtrs(t blas64.TriangularBand, b blas64.General) { + lapack64.Dpbtrs(t.Uplo, t.N, t.K, b.Cols, t.Data, max(1, t.Stride), b.Data, max(1, b.Stride)) +} + +// Pstrf computes the Cholesky factorization with complete pivoting of an n×n +// symmetric positive semidefinite matrix A. +// +// The factorization has the form +// +// Pᵀ * A * P = Uᵀ * U , if a.Uplo = blas.Upper, +// Pᵀ * A * P = L * Lᵀ, if a.Uplo = blas.Lower, +// +// where U is an upper triangular matrix, L is lower triangular, and P is a +// permutation matrix. +// +// tol is a user-defined tolerance. The algorithm terminates if the pivot is +// less than or equal to tol. If tol is negative, then n*eps*max(A[k,k]) will be +// used instead. +// +// The triangular factor U or L from the Cholesky factorization is returned in t +// and the underlying data between a and t is shared. P is stored on return in +// vector piv such that P[piv[k],k] = 1. +// +// Pstrf returns the computed rank of A and whether the factorization can be +// used to solve a system. Pstrf does not attempt to check that A is positive +// semi-definite, so if ok is false, the matrix A is either rank deficient or is +// not positive semidefinite. +// +// The length of piv must be n and the length of work must be at least 2*n, +// otherwise Pstrf will panic. +func Pstrf(a blas64.Symmetric, piv []int, tol float64, work []float64) (t blas64.Triangular, rank int, ok bool) { + rank, ok = lapack64.Dpstrf(a.Uplo, a.N, a.Data, max(1, a.Stride), piv, tol, work) + t.Uplo = a.Uplo + t.Diag = blas.NonUnit + t.N = a.N + t.Data = a.Data + t.Stride = a.Stride + return t, rank, ok +} + +// Gecon estimates the reciprocal of the condition number of the n×n matrix A +// given the LU decomposition of the matrix. The condition number computed may +// be based on the 1-norm or the ∞-norm. +// +// a contains the result of the LU decomposition of A as computed by Getrf. +// +// anorm is the corresponding 1-norm or ∞-norm of the original matrix A. +// +// work is a temporary data slice of length at least 4*n and Gecon will panic otherwise. +// +// iwork is a temporary data slice of length at least n and Gecon will panic otherwise. +func Gecon(norm lapack.MatrixNorm, a blas64.General, anorm float64, work []float64, iwork []int) float64 { + return lapack64.Dgecon(norm, a.Cols, a.Data, max(1, a.Stride), anorm, work, iwork) +} + +// Gels finds a minimum-norm solution based on the matrices A and B using the +// QR or LQ factorization. Gels returns false if the matrix +// A is singular, and true if this solution was successfully found. +// +// The minimization problem solved depends on the input parameters. +// +// 1. If m >= n and trans == blas.NoTrans, Gels finds X such that || A*X - B||_2 +// is minimized. +// 2. If m < n and trans == blas.NoTrans, Gels finds the minimum norm solution of +// A * X = B. +// 3. If m >= n and trans == blas.Trans, Gels finds the minimum norm solution of +// Aᵀ * X = B. +// 4. If m < n and trans == blas.Trans, Gels finds X such that || A*X - B||_2 +// is minimized. +// +// Note that the least-squares solutions (cases 1 and 3) perform the minimization +// per column of B. This is not the same as finding the minimum-norm matrix. +// +// The matrix A is a general matrix of size m×n and is modified during this call. +// The input matrix B is of size max(m,n)×nrhs, and serves two purposes. On entry, +// the elements of b specify the input matrix B. B has size m×nrhs if +// trans == blas.NoTrans, and n×nrhs if trans == blas.Trans. On exit, the +// leading submatrix of b contains the solution vectors X. If trans == blas.NoTrans, +// this submatrix is of size n×nrhs, and of size m×nrhs otherwise. +// +// Work is temporary storage, and lwork specifies the usable memory length. +// At minimum, lwork >= max(m,n) + max(m,n,nrhs), and this function will panic +// otherwise. A longer work will enable blocked algorithms to be called. +// In the special case that lwork == -1, work[0] will be set to the optimal working +// length. +func Gels(trans blas.Transpose, a blas64.General, b blas64.General, work []float64, lwork int) bool { + return lapack64.Dgels(trans, a.Rows, a.Cols, b.Cols, a.Data, max(1, a.Stride), b.Data, max(1, b.Stride), work, lwork) +} + +// Geqp3 computes a QR factorization with column pivoting of the m×n matrix A: +// +// A*P = Q*R +// +// where P is a permutation matrix, Q is an orthogonal matrix and R is a +// min(m,n)×n upper trapezoidal matrix. +// +// On return, the upper triangle of A contains the matrix R. The elements below +// the diagonal together with tau represent the matrix Q as a product of +// elementary reflectors +// +// Q = H_0 * H_1 * ... * H_{k-1}, where k = min(m,n). +// +// Each H_i has the form +// +// H_i = I - tau * v * vᵀ +// +// where tau is a scalar and v is a vector with v[0:i] = 0 and v[i] = 1; +// v[i+1:m] is stored on exit in A[i+1:m,i], and tau in tau[i]. +// +// jpvt specifies a column pivot to be applied to A. On entry, if jpvt[j] is at +// least zero, the jth column of A is permuted to the front of A*P (a leading +// column), if jpvt[j] is -1 the jth column of A is a free column. If jpvt[j] < +// -1, Geqp3 will panic. On return, jpvt holds the permutation that was applied; +// the jth column of A*P was the jpvt[j] column of A. jpvt must have length n or +// Geqp3 will panic. +// +// tau holds the scalar factors of the elementary reflectors. It must have +// length min(m,n), otherwise Geqp3 will panic. +// +// work must have length at least max(1,lwork), and lwork must be at least +// 3*n+1, otherwise Geqp3 will panic. For optimal performance lwork must be at +// least 2*n+(n+1)*nb, where nb is the optimal blocksize. On return, work[0] +// will contain the optimal value of lwork. +// +// If lwork == -1, instead of performing Geqp3, only the optimal value of lwork +// will be stored in work[0]. +func Geqp3(a blas64.General, jpvt []int, tau, work []float64, lwork int) { + lapack64.Dgeqp3(a.Rows, a.Cols, a.Data, max(1, a.Stride), jpvt, tau, work, lwork) +} + +// Geqrf computes the QR factorization of the m×n matrix A using a blocked +// algorithm. A is modified to contain the information to construct Q and R. +// The upper triangle of a contains the matrix R. The lower triangular elements +// (not including the diagonal) contain the elementary reflectors. tau is modified +// to contain the reflector scales. tau must have length min(m,n), and +// this function will panic otherwise. +// +// The ith elementary reflector can be explicitly constructed by first extracting +// the +// +// v[j] = 0 j < i +// v[j] = 1 j == i +// v[j] = a[j*lda+i] j > i +// +// and computing H_i = I - tau[i] * v * vᵀ. +// +// The orthonormal matrix Q can be constructed from a product of these elementary +// reflectors, Q = H_0 * H_1 * ... * H_{k-1}, where k = min(m,n). +// +// Work is temporary storage, and lwork specifies the usable memory length. +// At minimum, lwork >= m and this function will panic otherwise. +// Geqrf is a blocked QR factorization, but the block size is limited +// by the temporary space available. If lwork == -1, instead of performing Geqrf, +// the optimal work length will be stored into work[0]. +func Geqrf(a blas64.General, tau, work []float64, lwork int) { + lapack64.Dgeqrf(a.Rows, a.Cols, a.Data, max(1, a.Stride), tau, work, lwork) +} + +// Gelqf computes the LQ factorization of the m×n matrix A using a blocked +// algorithm. A is modified to contain the information to construct L and Q. The +// lower triangle of a contains the matrix L. The elements above the diagonal +// and the slice tau represent the matrix Q. tau is modified to contain the +// reflector scales. tau must have length at least min(m,n), and this function +// will panic otherwise. +// +// See Geqrf for a description of the elementary reflectors and orthonormal +// matrix Q. Q is constructed as a product of these elementary reflectors, +// Q = H_{k-1} * ... * H_1 * H_0. +// +// Work is temporary storage, and lwork specifies the usable memory length. +// At minimum, lwork >= m and this function will panic otherwise. +// Gelqf is a blocked LQ factorization, but the block size is limited +// by the temporary space available. If lwork == -1, instead of performing Gelqf, +// the optimal work length will be stored into work[0]. +func Gelqf(a blas64.General, tau, work []float64, lwork int) { + lapack64.Dgelqf(a.Rows, a.Cols, a.Data, max(1, a.Stride), tau, work, lwork) +} + +// Gesvd computes the singular value decomposition of the input matrix A. +// +// The singular value decomposition is +// +// A = U * Sigma * Vᵀ +// +// where Sigma is an m×n diagonal matrix containing the singular values of A, +// U is an m×m orthogonal matrix and V is an n×n orthogonal matrix. The first +// min(m,n) columns of U and V are the left and right singular vectors of A +// respectively. +// +// jobU and jobVT are options for computing the singular vectors. The behavior +// is as follows +// +// jobU == lapack.SVDAll All m columns of U are returned in u +// jobU == lapack.SVDStore The first min(m,n) columns are returned in u +// jobU == lapack.SVDOverwrite The first min(m,n) columns of U are written into a +// jobU == lapack.SVDNone The columns of U are not computed. +// +// The behavior is the same for jobVT and the rows of Vᵀ. At most one of jobU +// and jobVT can equal lapack.SVDOverwrite, and Gesvd will panic otherwise. +// +// On entry, a contains the data for the m×n matrix A. During the call to Gesvd +// the data is overwritten. On exit, A contains the appropriate singular vectors +// if either job is lapack.SVDOverwrite. +// +// s is a slice of length at least min(m,n) and on exit contains the singular +// values in decreasing order. +// +// u contains the left singular vectors on exit, stored columnwise. If +// jobU == lapack.SVDAll, u is of size m×m. If jobU == lapack.SVDStore u is +// of size m×min(m,n). If jobU == lapack.SVDOverwrite or lapack.SVDNone, u is +// not used. +// +// vt contains the left singular vectors on exit, stored rowwise. If +// jobV == lapack.SVDAll, vt is of size n×m. If jobVT == lapack.SVDStore vt is +// of size min(m,n)×n. If jobVT == lapack.SVDOverwrite or lapack.SVDNone, vt is +// not used. +// +// work is a slice for storing temporary memory, and lwork is the usable size of +// the slice. lwork must be at least max(5*min(m,n), 3*min(m,n)+max(m,n)). +// If lwork == -1, instead of performing Gesvd, the optimal work length will be +// stored into work[0]. Gesvd will panic if the working memory has insufficient +// storage. +// +// Gesvd returns whether the decomposition successfully completed. +func Gesvd(jobU, jobVT lapack.SVDJob, a, u, vt blas64.General, s, work []float64, lwork int) (ok bool) { + return lapack64.Dgesvd(jobU, jobVT, a.Rows, a.Cols, a.Data, max(1, a.Stride), s, u.Data, max(1, u.Stride), vt.Data, max(1, vt.Stride), work, lwork) +} + +// Getrf computes the LU decomposition of an m×n matrix A using partial +// pivoting with row interchanges. +// +// The LU decomposition is a factorization of A into +// +// A = P * L * U +// +// where P is a permutation matrix, L is a lower triangular with unit diagonal +// elements (lower trapezoidal if m > n), and U is upper triangular (upper +// trapezoidal if m < n). +// +// On entry, a contains the matrix A. On return, L and U are stored in place +// into a, and P is represented by ipiv. +// +// ipiv contains a sequence of row swaps. It indicates that row i of the matrix +// was interchanged with ipiv[i]. ipiv must have length min(m,n), and Getrf will +// panic otherwise. ipiv is zero-indexed. +// +// Getrf returns whether the matrix A is nonsingular. The LU decomposition will +// be computed regardless of the singularity of A, but the result should not be +// used to solve a system of equation. +func Getrf(a blas64.General, ipiv []int) bool { + return lapack64.Dgetrf(a.Rows, a.Cols, a.Data, max(1, a.Stride), ipiv) +} + +// Getri computes the inverse of the matrix A using the LU factorization computed +// by Getrf. On entry, a contains the PLU decomposition of A as computed by +// Getrf and on exit contains the reciprocal of the original matrix. +// +// Getri will not perform the inversion if the matrix is singular, and returns +// a boolean indicating whether the inversion was successful. +// +// Work is temporary storage, and lwork specifies the usable memory length. +// At minimum, lwork >= n and this function will panic otherwise. +// Getri is a blocked inversion, but the block size is limited +// by the temporary space available. If lwork == -1, instead of performing Getri, +// the optimal work length will be stored into work[0]. +func Getri(a blas64.General, ipiv []int, work []float64, lwork int) (ok bool) { + return lapack64.Dgetri(a.Cols, a.Data, max(1, a.Stride), ipiv, work, lwork) +} + +// Getrs solves a system of equations using an LU factorization. +// The system of equations solved is +// +// A * X = B if trans == blas.Trans +// Aᵀ * X = B if trans == blas.NoTrans +// +// A is a general n×n matrix with stride lda. B is a general matrix of size n×nrhs. +// +// On entry b contains the elements of the matrix B. On exit, b contains the +// elements of X, the solution to the system of equations. +// +// a and ipiv contain the LU factorization of A and the permutation indices as +// computed by Getrf. ipiv is zero-indexed. +func Getrs(trans blas.Transpose, a blas64.General, b blas64.General, ipiv []int) { + lapack64.Dgetrs(trans, a.Cols, b.Cols, a.Data, max(1, a.Stride), ipiv, b.Data, max(1, b.Stride)) +} + +// Ggsvd3 computes the generalized singular value decomposition (GSVD) +// of an m×n matrix A and p×n matrix B: +// +// Uᵀ*A*Q = D1*[ 0 R ] +// +// Vᵀ*B*Q = D2*[ 0 R ] +// +// where U, V and Q are orthogonal matrices. +// +// Ggsvd3 returns k and l, the dimensions of the sub-blocks. k+l +// is the effective numerical rank of the (m+p)×n matrix [ Aᵀ Bᵀ ]ᵀ. +// R is a (k+l)×(k+l) nonsingular upper triangular matrix, D1 and +// D2 are m×(k+l) and p×(k+l) diagonal matrices and of the following +// structures, respectively: +// +// If m-k-l >= 0, +// +// k l +// D1 = k [ I 0 ] +// l [ 0 C ] +// m-k-l [ 0 0 ] +// +// k l +// D2 = l [ 0 S ] +// p-l [ 0 0 ] +// +// n-k-l k l +// [ 0 R ] = k [ 0 R11 R12 ] k +// l [ 0 0 R22 ] l +// +// where +// +// C = diag( alpha_k, ... , alpha_{k+l} ), +// S = diag( beta_k, ... , beta_{k+l} ), +// C^2 + S^2 = I. +// +// R is stored in +// +// A[0:k+l, n-k-l:n] +// +// on exit. +// +// If m-k-l < 0, +// +// k m-k k+l-m +// D1 = k [ I 0 0 ] +// m-k [ 0 C 0 ] +// +// k m-k k+l-m +// D2 = m-k [ 0 S 0 ] +// k+l-m [ 0 0 I ] +// p-l [ 0 0 0 ] +// +// n-k-l k m-k k+l-m +// [ 0 R ] = k [ 0 R11 R12 R13 ] +// m-k [ 0 0 R22 R23 ] +// k+l-m [ 0 0 0 R33 ] +// +// where +// +// C = diag( alpha_k, ... , alpha_m ), +// S = diag( beta_k, ... , beta_m ), +// C^2 + S^2 = I. +// +// R = [ R11 R12 R13 ] is stored in A[1:m, n-k-l+1:n] +// [ 0 R22 R23 ] +// +// and R33 is stored in +// +// B[m-k:l, n+m-k-l:n] on exit. +// +// Ggsvd3 computes C, S, R, and optionally the orthogonal transformation +// matrices U, V and Q. +// +// jobU, jobV and jobQ are options for computing the orthogonal matrices. The behavior +// is as follows +// +// jobU == lapack.GSVDU Compute orthogonal matrix U +// jobU == lapack.GSVDNone Do not compute orthogonal matrix. +// +// The behavior is the same for jobV and jobQ with the exception that instead of +// lapack.GSVDU these accept lapack.GSVDV and lapack.GSVDQ respectively. +// The matrices U, V and Q must be m×m, p×p and n×n respectively unless the +// relevant job parameter is lapack.GSVDNone. +// +// alpha and beta must have length n or Ggsvd3 will panic. On exit, alpha and +// beta contain the generalized singular value pairs of A and B +// +// alpha[0:k] = 1, +// beta[0:k] = 0, +// +// if m-k-l >= 0, +// +// alpha[k:k+l] = diag(C), +// beta[k:k+l] = diag(S), +// +// if m-k-l < 0, +// +// alpha[k:m]= C, alpha[m:k+l]= 0 +// beta[k:m] = S, beta[m:k+l] = 1. +// +// if k+l < n, +// +// alpha[k+l:n] = 0 and +// beta[k+l:n] = 0. +// +// On exit, iwork contains the permutation required to sort alpha descending. +// +// iwork must have length n, work must have length at least max(1, lwork), and +// lwork must be -1 or greater than n, otherwise Ggsvd3 will panic. If +// lwork is -1, work[0] holds the optimal lwork on return, but Ggsvd3 does +// not perform the GSVD. +func Ggsvd3(jobU, jobV, jobQ lapack.GSVDJob, a, b blas64.General, alpha, beta []float64, u, v, q blas64.General, work []float64, lwork int, iwork []int) (k, l int, ok bool) { + return lapack64.Dggsvd3(jobU, jobV, jobQ, a.Rows, a.Cols, b.Rows, a.Data, max(1, a.Stride), b.Data, max(1, b.Stride), alpha, beta, u.Data, max(1, u.Stride), v.Data, max(1, v.Stride), q.Data, max(1, q.Stride), work, lwork, iwork) +} + +// Gtsv solves one of the equations +// +// A * X = B if trans == blas.NoTrans +// Aᵀ * X = B if trans == blas.Trans or blas.ConjTrans +// +// where A is an n×n tridiagonal matrix. It uses Gaussian elimination with +// partial pivoting. +// +// On entry, a contains the matrix A, on return it will be overwritten. +// +// On entry, b contains the n×nrhs right-hand side matrix B. On return, it will +// be overwritten. If ok is true, it will be overwritten by the solution matrix X. +// +// Gtsv returns whether the solution X has been successfully computed. +// +// Dgtsv is not part of the lapack.Float64 interface and so calls to Gtsv are +// always executed by the Gonum implementation. +func Gtsv(trans blas.Transpose, a Tridiagonal, b blas64.General) (ok bool) { + if trans != blas.NoTrans { + a.DL, a.DU = a.DU, a.DL + } + return gonum.Implementation{}.Dgtsv(a.N, b.Cols, a.DL, a.D, a.DU, b.Data, max(1, b.Stride)) +} + +// Lagtm performs one of the matrix-matrix operations +// +// C = alpha * A * B + beta * C if trans == blas.NoTrans +// C = alpha * Aᵀ * B + beta * C if trans == blas.Trans or blas.ConjTrans +// +// where A is an m×m tridiagonal matrix represented by its diagonals dl, d, du, +// B and C are m×n dense matrices, and alpha and beta are scalars. +// +// Dlagtm is not part of the lapack.Float64 interface and so calls to Lagtm are +// always executed by the Gonum implementation. +func Lagtm(trans blas.Transpose, alpha float64, a Tridiagonal, b blas64.General, beta float64, c blas64.General) { + gonum.Implementation{}.Dlagtm(trans, c.Rows, c.Cols, alpha, a.DL, a.D, a.DU, b.Data, max(1, b.Stride), beta, c.Data, max(1, c.Stride)) +} + +// Lange computes the matrix norm of the general m×n matrix A. The input norm +// specifies the norm computed. +// +// lapack.MaxAbs: the maximum absolute value of an element. +// lapack.MaxColumnSum: the maximum column sum of the absolute values of the entries. +// lapack.MaxRowSum: the maximum row sum of the absolute values of the entries. +// lapack.Frobenius: the square root of the sum of the squares of the entries. +// +// If norm == lapack.MaxColumnSum, work must be of length n, and this function will panic otherwise. +// There are no restrictions on work for the other matrix norms. +func Lange(norm lapack.MatrixNorm, a blas64.General, work []float64) float64 { + return lapack64.Dlange(norm, a.Rows, a.Cols, a.Data, max(1, a.Stride), work) +} + +// Langb returns the given norm of a general m×n band matrix with kl sub-diagonals and +// ku super-diagonals. +// +// Dlangb is not part of the lapack.Float64 interface and so calls to Langb are always +// executed by the Gonum implementation. +func Langb(norm lapack.MatrixNorm, a blas64.Band) float64 { + return gonum.Implementation{}.Dlangb(norm, a.Rows, a.Cols, a.KL, a.KU, a.Data, max(1, a.Stride)) +} + +// Langt computes the specified norm of an n×n tridiagonal matrix. +// +// Dlangt is not part of the lapack.Float64 interface and so calls to Langt are +// always executed by the Gonum implementation. +func Langt(norm lapack.MatrixNorm, a Tridiagonal) float64 { + return gonum.Implementation{}.Dlangt(norm, a.N, a.DL, a.D, a.DU) +} + +// Lansb computes the specified norm of an n×n symmetric band matrix. If +// norm == lapack.MaxColumnSum or norm == lapack.MaxRowSum, work must have length +// at least n and this function will panic otherwise. +// There are no restrictions on work for the other matrix norms. +// +// Dlansb is not part of the lapack.Float64 interface and so calls to Lansb are always +// executed by the Gonum implementation. +func Lansb(norm lapack.MatrixNorm, a blas64.SymmetricBand, work []float64) float64 { + return gonum.Implementation{}.Dlansb(norm, a.Uplo, a.N, a.K, a.Data, max(1, a.Stride), work) +} + +// Lansy computes the specified norm of an n×n symmetric matrix. If +// norm == lapack.MaxColumnSum or norm == lapack.MaxRowSum, work must have length +// at least n and this function will panic otherwise. +// There are no restrictions on work for the other matrix norms. +func Lansy(norm lapack.MatrixNorm, a blas64.Symmetric, work []float64) float64 { + return lapack64.Dlansy(norm, a.Uplo, a.N, a.Data, max(1, a.Stride), work) +} + +// Lantr computes the specified norm of an m×n trapezoidal matrix A. If +// norm == lapack.MaxColumnSum work must have length at least n and this function +// will panic otherwise. There are no restrictions on work for the other matrix norms. +func Lantr(norm lapack.MatrixNorm, a blas64.Triangular, work []float64) float64 { + return lapack64.Dlantr(norm, a.Uplo, a.Diag, a.N, a.N, a.Data, max(1, a.Stride), work) +} + +// Lantb computes the specified norm of an n×n triangular band matrix A. If +// norm == lapack.MaxColumnSum work must have length at least n and this function +// will panic otherwise. There are no restrictions on work for the other matrix +// norms. +func Lantb(norm lapack.MatrixNorm, a blas64.TriangularBand, work []float64) float64 { + return gonum.Implementation{}.Dlantb(norm, a.Uplo, a.Diag, a.N, a.K, a.Data, max(1, a.Stride), work) +} + +// Lapmr rearranges the rows of the m×n matrix X as specified by the permutation +// k[0],k[1],...,k[m-1] of the integers 0,...,m-1. +// +// If forward is true, a forward permutation is applied: +// +// X[k[i],0:n] is moved to X[i,0:n] for i=0,1,...,m-1. +// +// If forward is false, a backward permutation is applied: +// +// X[i,0:n] is moved to X[k[i],0:n] for i=0,1,...,m-1. +// +// k must have length m, otherwise Lapmr will panic. k is zero-indexed. +func Lapmr(forward bool, x blas64.General, k []int) { + lapack64.Dlapmr(forward, x.Rows, x.Cols, x.Data, max(1, x.Stride), k) +} + +// Lapmt rearranges the columns of the m×n matrix X as specified by the +// permutation k[0],k[1],...,k[n-1] of the integers 0,...,n-1. +// +// If forward is true, a forward permutation is applied: +// +// X[0:m,k[j]] is moved to X[0:m,j] for j=0,1,...,n-1. +// +// If forward is false, a backward permutation is applied: +// +// X[0:m,j] is moved to X[0:m,k[j]] for j=0,1,...,n-1. +// +// k must have length n, otherwise Lapmt will panic. k is zero-indexed. +func Lapmt(forward bool, x blas64.General, k []int) { + lapack64.Dlapmt(forward, x.Rows, x.Cols, x.Data, max(1, x.Stride), k) +} + +// Orglq generates an m×n matrix Q with orthonormal rows defined as the first m +// rows of a product of k elementary reflectors of order n +// +// Q = H_{k-1} * ... * H_0 +// +// as returned by Dgelqf. +// +// k is determined by the length of tau. +// +// On entry, tau and the first k rows of A must contain the scalar factors and +// the vectors, respectively, which define the elementary reflectors H_i, +// i=0,...,k-1, as returned by Dgelqf. On return, A contains the matrix Q. +// +// work must have length at least lwork and lwork must be at least max(1,m). On +// return, optimal value of lwork will be stored in work[0]. It must also hold +// that 0 <= k <= m <= n, otherwise Orglq will panic. +// +// If lwork == -1, instead of performing Orglq, the function only calculates the +// optimal value of lwork and stores it into work[0]. +func Orglq(a blas64.General, tau, work []float64, lwork int) { + lapack64.Dorglq(a.Rows, a.Cols, len(tau), a.Data, a.Stride, tau, work, lwork) +} + +// Ormlq multiplies the matrix C by the othogonal matrix Q defined by +// A and tau. A and tau are as returned from Gelqf. +// +// C = Q * C if side == blas.Left and trans == blas.NoTrans +// C = Qᵀ * C if side == blas.Left and trans == blas.Trans +// C = C * Q if side == blas.Right and trans == blas.NoTrans +// C = C * Qᵀ if side == blas.Right and trans == blas.Trans +// +// If side == blas.Left, A is a matrix of side k×m, and if side == blas.Right +// A is of size k×n. This uses a blocked algorithm. +// +// Work is temporary storage, and lwork specifies the usable memory length. +// At minimum, lwork >= m if side == blas.Left and lwork >= n if side == blas.Right, +// and this function will panic otherwise. +// Ormlq uses a block algorithm, but the block size is limited +// by the temporary space available. If lwork == -1, instead of performing Ormlq, +// the optimal work length will be stored into work[0]. +// +// Tau contains the Householder scales and must have length at least k, and +// this function will panic otherwise. +func Ormlq(side blas.Side, trans blas.Transpose, a blas64.General, tau []float64, c blas64.General, work []float64, lwork int) { + lapack64.Dormlq(side, trans, c.Rows, c.Cols, a.Rows, a.Data, max(1, a.Stride), tau, c.Data, max(1, c.Stride), work, lwork) +} + +// Orgqr generates an m×n matrix Q with orthonormal columns defined by the +// product of elementary reflectors +// +// Q = H_0 * H_1 * ... * H_{k-1} +// +// as computed by Geqrf. +// +// k is determined by the length of tau. +// +// The length of work must be at least n and it also must be that 0 <= k <= n +// and 0 <= n <= m. +// +// work is temporary storage, and lwork specifies the usable memory length. At +// minimum, lwork >= n, and the amount of blocking is limited by the usable +// length. If lwork == -1, instead of computing Orgqr the optimal work length +// is stored into work[0]. +// +// Orgqr will panic if the conditions on input values are not met. +func Orgqr(a blas64.General, tau []float64, work []float64, lwork int) { + lapack64.Dorgqr(a.Rows, a.Cols, len(tau), a.Data, a.Stride, tau, work, lwork) +} + +// Ormqr multiplies an m×n matrix C by an orthogonal matrix Q as +// +// C = Q * C if side == blas.Left and trans == blas.NoTrans, +// C = Qᵀ * C if side == blas.Left and trans == blas.Trans, +// C = C * Q if side == blas.Right and trans == blas.NoTrans, +// C = C * Qᵀ if side == blas.Right and trans == blas.Trans, +// +// where Q is defined as the product of k elementary reflectors +// +// Q = H_0 * H_1 * ... * H_{k-1}. +// +// k is determined by the length of tau. +// +// If side == blas.Left, A is an m×k matrix and 0 <= k <= m. +// If side == blas.Right, A is an n×k matrix and 0 <= k <= n. +// The ith column of A contains the vector which defines the elementary +// reflector H_i and tau[i] contains its scalar factor. Geqrf returns A and tau +// in the required form. +// +// work must have length at least max(1,lwork), and lwork must be at least n if +// side == blas.Left and at least m if side == blas.Right, otherwise Ormqr will +// panic. +// +// work is temporary storage, and lwork specifies the usable memory length. At +// minimum, lwork >= m if side == blas.Left and lwork >= n if side == +// blas.Right, and this function will panic otherwise. Larger values of lwork +// will generally give better performance. On return, work[0] will contain the +// optimal value of lwork. +// +// If lwork is -1, instead of performing Ormqr, the optimal workspace size will +// be stored into work[0]. +func Ormqr(side blas.Side, trans blas.Transpose, a blas64.General, tau []float64, c blas64.General, work []float64, lwork int) { + lapack64.Dormqr(side, trans, c.Rows, c.Cols, len(tau), a.Data, max(1, a.Stride), tau, c.Data, max(1, c.Stride), work, lwork) +} + +// Pocon estimates the reciprocal of the condition number of a positive-definite +// matrix A given the Cholesky decomposition of A. The condition number computed +// is based on the 1-norm and the ∞-norm. +// +// anorm is the 1-norm and the ∞-norm of the original matrix A. +// +// work is a temporary data slice of length at least 3*n and Pocon will panic otherwise. +// +// iwork is a temporary data slice of length at least n and Pocon will panic otherwise. +func Pocon(a blas64.Symmetric, anorm float64, work []float64, iwork []int) float64 { + return lapack64.Dpocon(a.Uplo, a.N, a.Data, max(1, a.Stride), anorm, work, iwork) +} + +// Syev computes all eigenvalues and, optionally, the eigenvectors of a real +// symmetric matrix A. +// +// w contains the eigenvalues in ascending order upon return. w must have length +// at least n, and Syev will panic otherwise. +// +// On entry, a contains the elements of the symmetric matrix A in the triangular +// portion specified by uplo. If jobz == lapack.EVCompute, a contains the +// orthonormal eigenvectors of A on exit, otherwise jobz must be lapack.EVNone +// and on exit the specified triangular region is overwritten. +// +// Work is temporary storage, and lwork specifies the usable memory length. At minimum, +// lwork >= 3*n-1, and Syev will panic otherwise. The amount of blocking is +// limited by the usable length. If lwork == -1, instead of computing Syev the +// optimal work length is stored into work[0]. +func Syev(jobz lapack.EVJob, a blas64.Symmetric, w, work []float64, lwork int) (ok bool) { + return lapack64.Dsyev(jobz, a.Uplo, a.N, a.Data, max(1, a.Stride), w, work, lwork) +} + +// Tbtrs solves a triangular system of the form +// +// A * X = B if trans == blas.NoTrans +// Aᵀ * X = B if trans == blas.Trans or blas.ConjTrans +// +// where A is an n×n triangular band matrix, and B is an n×nrhs matrix. +// +// Tbtrs returns whether A is non-singular. If A is singular, no solutions X +// are computed. +func Tbtrs(trans blas.Transpose, a blas64.TriangularBand, b blas64.General) (ok bool) { + return lapack64.Dtbtrs(a.Uplo, trans, a.Diag, a.N, a.K, b.Cols, a.Data, max(1, a.Stride), b.Data, max(1, b.Stride)) +} + +// Trcon estimates the reciprocal of the condition number of a triangular matrix A. +// The condition number computed may be based on the 1-norm or the ∞-norm. +// +// work is a temporary data slice of length at least 3*n and Trcon will panic otherwise. +// +// iwork is a temporary data slice of length at least n and Trcon will panic otherwise. +func Trcon(norm lapack.MatrixNorm, a blas64.Triangular, work []float64, iwork []int) float64 { + return lapack64.Dtrcon(norm, a.Uplo, a.Diag, a.N, a.Data, max(1, a.Stride), work, iwork) +} + +// Trtri computes the inverse of a triangular matrix, storing the result in place +// into a. +// +// Trtri will not perform the inversion if the matrix is singular, and returns +// a boolean indicating whether the inversion was successful. +func Trtri(a blas64.Triangular) (ok bool) { + return lapack64.Dtrtri(a.Uplo, a.Diag, a.N, a.Data, max(1, a.Stride)) +} + +// Trtrs solves a triangular system of the form A * X = B or Aᵀ * X = B. Trtrs +// returns whether the solve completed successfully. If A is singular, no solve is performed. +func Trtrs(trans blas.Transpose, a blas64.Triangular, b blas64.General) (ok bool) { + return lapack64.Dtrtrs(a.Uplo, trans, a.Diag, a.N, b.Cols, a.Data, max(1, a.Stride), b.Data, max(1, b.Stride)) +} + +// Geev computes the eigenvalues and, optionally, the left and/or right +// eigenvectors for an n×n real nonsymmetric matrix A. +// +// The right eigenvector v_j of A corresponding to an eigenvalue λ_j +// is defined by +// +// A v_j = λ_j v_j, +// +// and the left eigenvector u_j corresponding to an eigenvalue λ_j is defined by +// +// u_jᴴ A = λ_j u_jᴴ, +// +// where u_jᴴ is the conjugate transpose of u_j. +// +// On return, A will be overwritten and the left and right eigenvectors will be +// stored, respectively, in the columns of the n×n matrices VL and VR in the +// same order as their eigenvalues. If the j-th eigenvalue is real, then +// +// u_j = VL[:,j], +// v_j = VR[:,j], +// +// and if it is not real, then j and j+1 form a complex conjugate pair and the +// eigenvectors can be recovered as +// +// u_j = VL[:,j] + i*VL[:,j+1], +// u_{j+1} = VL[:,j] - i*VL[:,j+1], +// v_j = VR[:,j] + i*VR[:,j+1], +// v_{j+1} = VR[:,j] - i*VR[:,j+1], +// +// where i is the imaginary unit. The computed eigenvectors are normalized to +// have Euclidean norm equal to 1 and largest component real. +// +// Left eigenvectors will be computed only if jobvl == lapack.LeftEVCompute, +// otherwise jobvl must be lapack.LeftEVNone. +// Right eigenvectors will be computed only if jobvr == lapack.RightEVCompute, +// otherwise jobvr must be lapack.RightEVNone. +// For other values of jobvl and jobvr Geev will panic. +// +// On return, wr and wi will contain the real and imaginary parts, respectively, +// of the computed eigenvalues. Complex conjugate pairs of eigenvalues appear +// consecutively with the eigenvalue having the positive imaginary part first. +// wr and wi must have length n, and Geev will panic otherwise. +// +// work must have length at least lwork and lwork must be at least max(1,4*n) if +// the left or right eigenvectors are computed, and at least max(1,3*n) if no +// eigenvectors are computed. For good performance, lwork must generally be +// larger. On return, optimal value of lwork will be stored in work[0]. +// +// If lwork == -1, instead of performing Geev, the function only calculates the +// optimal value of lwork and stores it into work[0]. +// +// On return, first will be the index of the first valid eigenvalue. +// If first == 0, all eigenvalues and eigenvectors have been computed. +// If first is positive, Geev failed to compute all the eigenvalues, no +// eigenvectors have been computed and wr[first:] and wi[first:] contain those +// eigenvalues which have converged. +func Geev(jobvl lapack.LeftEVJob, jobvr lapack.RightEVJob, a blas64.General, wr, wi []float64, vl, vr blas64.General, work []float64, lwork int) (first int) { + n := a.Rows + if a.Cols != n { + panic("lapack64: matrix not square") + } + if jobvl == lapack.LeftEVCompute && (vl.Rows != n || vl.Cols != n) { + panic("lapack64: bad size of VL") + } + if jobvr == lapack.RightEVCompute && (vr.Rows != n || vr.Cols != n) { + panic("lapack64: bad size of VR") + } + return lapack64.Dgeev(jobvl, jobvr, n, a.Data, max(1, a.Stride), wr, wi, vl.Data, max(1, vl.Stride), vr.Data, max(1, vr.Stride), work, lwork) +} diff --git a/vendor/gonum.org/v1/gonum/mat/README.md b/vendor/gonum.org/v1/gonum/mat/README.md new file mode 100644 index 0000000000..5e7be6b234 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/mat/README.md @@ -0,0 +1,6 @@ +# Gonum matrix + +[![go.dev reference](https://pkg.go.dev/badge/gonum.org/v1/gonum/mat)](https://pkg.go.dev/gonum.org/v1/gonum/mat) +[![GoDoc](https://godocs.io/gonum.org/v1/gonum/mat?status.svg)](https://godocs.io/gonum.org/v1/gonum/mat) + +Package mat is a matrix package for the Go language. diff --git a/vendor/gonum.org/v1/gonum/mat/band.go b/vendor/gonum.org/v1/gonum/mat/band.go new file mode 100644 index 0000000000..7660cdaa8e --- /dev/null +++ b/vendor/gonum.org/v1/gonum/mat/band.go @@ -0,0 +1,368 @@ +// Copyright ©2017 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package mat + +import ( + "gonum.org/v1/gonum/blas" + "gonum.org/v1/gonum/blas/blas64" + "gonum.org/v1/gonum/lapack" + "gonum.org/v1/gonum/lapack/lapack64" +) + +var ( + bandDense *BandDense + _ Matrix = bandDense + _ allMatrix = bandDense + _ denseMatrix = bandDense + _ Banded = bandDense + _ RawBander = bandDense + + _ NonZeroDoer = bandDense + _ RowNonZeroDoer = bandDense + _ ColNonZeroDoer = bandDense +) + +// BandDense represents a band matrix in dense storage format. +type BandDense struct { + mat blas64.Band +} + +// Banded is a band matrix representation. +type Banded interface { + Matrix + // Bandwidth returns the lower and upper bandwidth values for + // the matrix. The total bandwidth of the matrix is kl+ku+1. + Bandwidth() (kl, ku int) + + // TBand is the equivalent of the T() method in the Matrix + // interface but guarantees the transpose is of banded type. + TBand() Banded +} + +// A RawBander can return a blas64.Band representation of the receiver. +// Changes to the blas64.Band.Data slice will be reflected in the original +// matrix, changes to the Rows, Cols, KL, KU and Stride fields will not. +type RawBander interface { + RawBand() blas64.Band +} + +// A MutableBanded can set elements of a band matrix. +type MutableBanded interface { + Banded + + // SetBand sets the element at row i, column j to the value v. + // It panics if the location is outside the appropriate region of the matrix. + SetBand(i, j int, v float64) +} + +var ( + _ Matrix = TransposeBand{} + _ Banded = TransposeBand{} + _ UntransposeBander = TransposeBand{} +) + +// TransposeBand is a type for performing an implicit transpose of a band +// matrix. It implements the Banded interface, returning values from the +// transpose of the matrix within. +type TransposeBand struct { + Banded Banded +} + +// At returns the value of the element at row i and column j of the transposed +// matrix, that is, row j and column i of the Banded field. +func (t TransposeBand) At(i, j int) float64 { + return t.Banded.At(j, i) +} + +// Dims returns the dimensions of the transposed matrix. +func (t TransposeBand) Dims() (r, c int) { + c, r = t.Banded.Dims() + return r, c +} + +// T performs an implicit transpose by returning the Banded field. +func (t TransposeBand) T() Matrix { + return t.Banded +} + +// Bandwidth returns the lower and upper bandwidth values for +// the transposed matrix. +func (t TransposeBand) Bandwidth() (kl, ku int) { + kl, ku = t.Banded.Bandwidth() + return ku, kl +} + +// TBand performs an implicit transpose by returning the Banded field. +func (t TransposeBand) TBand() Banded { + return t.Banded +} + +// Untranspose returns the Banded field. +func (t TransposeBand) Untranspose() Matrix { + return t.Banded +} + +// UntransposeBand returns the Banded field. +func (t TransposeBand) UntransposeBand() Banded { + return t.Banded +} + +// NewBandDense creates a new Band matrix with r rows and c columns. If data == nil, +// a new slice is allocated for the backing slice. If len(data) == min(r, c+kl)*(kl+ku+1), +// data is used as the backing slice, and changes to the elements of the returned +// BandDense will be reflected in data. If neither of these is true, NewBandDense +// will panic. kl must be at least zero and less r, and ku must be at least zero and +// less than c, otherwise NewBandDense will panic. +// NewBandDense will panic if either r or c is zero. +// +// The data must be arranged in row-major order constructed by removing the zeros +// from the rows outside the band and aligning the diagonals. For example, the matrix +// +// 1 2 3 0 0 0 +// 4 5 6 7 0 0 +// 0 8 9 10 11 0 +// 0 0 12 13 14 15 +// 0 0 0 16 17 18 +// 0 0 0 0 19 20 +// +// becomes (* entries are never accessed) +// - 1 2 3 +// 4 5 6 7 +// 8 9 10 11 +// 12 13 14 15 +// 16 17 18 * +// 19 20 * * +// +// which is passed to NewBandDense as []float64{*, 1, 2, 3, 4, ...} with kl=1 and ku=2. +// Only the values in the band portion of the matrix are used. +func NewBandDense(r, c, kl, ku int, data []float64) *BandDense { + if r <= 0 || c <= 0 || kl < 0 || ku < 0 { + if r == 0 || c == 0 { + panic(ErrZeroLength) + } + panic(ErrNegativeDimension) + } + if kl+1 > r || ku+1 > c { + panic(ErrBandwidth) + } + bc := kl + ku + 1 + if data != nil && len(data) != min(r, c+kl)*bc { + panic(ErrShape) + } + if data == nil { + data = make([]float64, min(r, c+kl)*bc) + } + return &BandDense{ + mat: blas64.Band{ + Rows: r, + Cols: c, + KL: kl, + KU: ku, + Stride: bc, + Data: data, + }, + } +} + +// NewDiagonalRect is a convenience function that returns a diagonal matrix represented by a +// BandDense. The length of data must be min(r, c) otherwise NewDiagonalRect will panic. +func NewDiagonalRect(r, c int, data []float64) *BandDense { + return NewBandDense(r, c, 0, 0, data) +} + +// Dims returns the number of rows and columns in the matrix. +func (b *BandDense) Dims() (r, c int) { + return b.mat.Rows, b.mat.Cols +} + +// Bandwidth returns the upper and lower bandwidths of the matrix. +func (b *BandDense) Bandwidth() (kl, ku int) { + return b.mat.KL, b.mat.KU +} + +// T performs an implicit transpose by returning the receiver inside a Transpose. +func (b *BandDense) T() Matrix { + return Transpose{b} +} + +// TBand performs an implicit transpose by returning the receiver inside a TransposeBand. +func (b *BandDense) TBand() Banded { + return TransposeBand{b} +} + +// RawBand returns the underlying blas64.Band used by the receiver. +// Changes to elements in the receiver following the call will be reflected +// in returned blas64.Band. +func (b *BandDense) RawBand() blas64.Band { + return b.mat +} + +// SetRawBand sets the underlying blas64.Band used by the receiver. +// Changes to elements in the receiver following the call will be reflected +// in the input. +func (b *BandDense) SetRawBand(mat blas64.Band) { + b.mat = mat +} + +// IsEmpty returns whether the receiver is empty. Empty matrices can be the +// receiver for size-restricted operations. The receiver can be zeroed using Reset. +func (b *BandDense) IsEmpty() bool { + return b.mat.Stride == 0 +} + +// Reset empties the matrix so that it can be reused as the +// receiver of a dimensionally restricted operation. +// +// Reset should not be used when the matrix shares backing data. +// See the Reseter interface for more information. +func (b *BandDense) Reset() { + b.mat.Rows = 0 + b.mat.Cols = 0 + b.mat.KL = 0 + b.mat.KU = 0 + b.mat.Stride = 0 + b.mat.Data = b.mat.Data[:0] +} + +// DiagView returns the diagonal as a matrix backed by the original data. +func (b *BandDense) DiagView() Diagonal { + n := min(b.mat.Rows, b.mat.Cols) + return &DiagDense{ + mat: blas64.Vector{ + N: n, + Inc: b.mat.Stride, + Data: b.mat.Data[b.mat.KL : (n-1)*b.mat.Stride+b.mat.KL+1], + }, + } +} + +// DoNonZero calls the function fn for each of the non-zero elements of b. The function fn +// takes a row/column index and the element value of b at (i, j). +func (b *BandDense) DoNonZero(fn func(i, j int, v float64)) { + for i := 0; i < min(b.mat.Rows, b.mat.Cols+b.mat.KL); i++ { + for j := max(0, i-b.mat.KL); j < min(b.mat.Cols, i+b.mat.KU+1); j++ { + v := b.at(i, j) + if v != 0 { + fn(i, j, v) + } + } + } +} + +// DoRowNonZero calls the function fn for each of the non-zero elements of row i of b. The function fn +// takes a row/column index and the element value of b at (i, j). +func (b *BandDense) DoRowNonZero(i int, fn func(i, j int, v float64)) { + if i < 0 || b.mat.Rows <= i { + panic(ErrRowAccess) + } + for j := max(0, i-b.mat.KL); j < min(b.mat.Cols, i+b.mat.KU+1); j++ { + v := b.at(i, j) + if v != 0 { + fn(i, j, v) + } + } +} + +// DoColNonZero calls the function fn for each of the non-zero elements of column j of b. The function fn +// takes a row/column index and the element value of b at (i, j). +func (b *BandDense) DoColNonZero(j int, fn func(i, j int, v float64)) { + if j < 0 || b.mat.Cols <= j { + panic(ErrColAccess) + } + for i := 0; i < min(b.mat.Rows, b.mat.Cols+b.mat.KL); i++ { + if i-b.mat.KL <= j && j < i+b.mat.KU+1 { + v := b.at(i, j) + if v != 0 { + fn(i, j, v) + } + } + } +} + +// Zero sets all of the matrix elements to zero. +func (b *BandDense) Zero() { + m := b.mat.Rows + kL := b.mat.KL + nCol := b.mat.KU + 1 + kL + for i := 0; i < m; i++ { + l := max(0, kL-i) + u := min(nCol, m+kL-i) + zero(b.mat.Data[i*b.mat.Stride+l : i*b.mat.Stride+u]) + } +} + +// Norm returns the specified norm of the receiver. Valid norms are: +// +// 1 - The maximum absolute column sum +// 2 - The Frobenius norm, the square root of the sum of the squares of the elements +// Inf - The maximum absolute row sum +// +// Norm will panic with ErrNormOrder if an illegal norm is specified and with +// ErrZeroLength if the matrix has zero size. +func (b *BandDense) Norm(norm float64) float64 { + if b.IsEmpty() { + panic(ErrZeroLength) + } + lnorm := normLapack(norm, false) + if lnorm == lapack.MaxColumnSum || lnorm == lapack.MaxRowSum { + return lapack64.Langb(lnorm, b.mat) + } + return lapack64.Langb(lnorm, b.mat) +} + +// Trace returns the trace of the matrix. +// +// Trace will panic with ErrSquare if the matrix is not square and with +// ErrZeroLength if the matrix has zero size. +func (b *BandDense) Trace() float64 { + r, c := b.Dims() + if r != c { + panic(ErrSquare) + } + if b.IsEmpty() { + panic(ErrZeroLength) + } + rb := b.RawBand() + var tr float64 + for i := 0; i < r; i++ { + tr += rb.Data[rb.KL+i*rb.Stride] + } + return tr +} + +// MulVecTo computes B⋅x or Bᵀ⋅x storing the result into dst. +func (b *BandDense) MulVecTo(dst *VecDense, trans bool, x Vector) { + m, n := b.Dims() + if trans { + m, n = n, m + } + if x.Len() != n { + panic(ErrShape) + } + dst.reuseAsNonZeroed(m) + + t := blas.NoTrans + if trans { + t = blas.Trans + } + + xMat, _ := untransposeExtract(x) + if xVec, ok := xMat.(*VecDense); ok { + if dst != xVec { + dst.checkOverlap(xVec.mat) + blas64.Gbmv(t, 1, b.mat, xVec.mat, 0, dst.mat) + } else { + xCopy := getVecDenseWorkspace(n, false) + xCopy.CloneFromVec(xVec) + blas64.Gbmv(t, 1, b.mat, xCopy.mat, 0, dst.mat) + putVecDenseWorkspace(xCopy) + } + } else { + xCopy := getVecDenseWorkspace(n, false) + xCopy.CloneFromVec(x) + blas64.Gbmv(t, 1, b.mat, xCopy.mat, 0, dst.mat) + putVecDenseWorkspace(xCopy) + } +} diff --git a/vendor/gonum.org/v1/gonum/mat/cdense.go b/vendor/gonum.org/v1/gonum/mat/cdense.go new file mode 100644 index 0000000000..86f0423c58 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/mat/cdense.go @@ -0,0 +1,368 @@ +// Copyright ©2019 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package mat + +import ( + "math/cmplx" + + "gonum.org/v1/gonum/blas/cblas128" +) + +var ( + cDense *CDense + + _ CMatrix = cDense + _ allMatrix = cDense +) + +// CDense is a dense matrix representation with complex data. +type CDense struct { + mat cblas128.General + + capRows, capCols int +} + +// Dims returns the number of rows and columns in the matrix. +func (m *CDense) Dims() (r, c int) { + return m.mat.Rows, m.mat.Cols +} + +// Caps returns the number of rows and columns in the backing matrix. +func (m *CDense) Caps() (r, c int) { return m.capRows, m.capCols } + +// H performs an implicit conjugate transpose by returning the receiver inside a +// ConjTranspose. +func (m *CDense) H() CMatrix { + return ConjTranspose{m} +} + +// T performs an implicit transpose by returning the receiver inside a +// CTranspose. +func (m *CDense) T() CMatrix { + return CTranspose{m} +} + +// Conj calculates the element-wise conjugate of a and stores the result in the +// receiver. +// Conj will panic if m and a do not have the same dimension unless m is empty. +func (m *CDense) Conj(a CMatrix) { + ar, ac := a.Dims() + aU, aTrans, aConj := untransposeExtractCmplx(a) + m.reuseAsNonZeroed(ar, ac) + + if arm, ok := a.(*CDense); ok { + amat := arm.mat + if m != aU { + m.checkOverlap(amat) + } + for ja, jm := 0, 0; ja < ar*amat.Stride; ja, jm = ja+amat.Stride, jm+m.mat.Stride { + for i, v := range amat.Data[ja : ja+ac] { + m.mat.Data[i+jm] = cmplx.Conj(v) + } + } + return + } + + m.checkOverlapMatrix(aU) + if aTrans != aConj && m == aU { + // Only make workspace if the destination is transposed + // with respect to the source and they are the same + // matrix. + var restore func() + m, restore = m.isolatedWorkspace(aU) + defer restore() + } + + for r := 0; r < ar; r++ { + for c := 0; c < ac; c++ { + m.set(r, c, cmplx.Conj(a.At(r, c))) + } + } +} + +// Slice returns a new CMatrix that shares backing data with the receiver. +// The returned matrix starts at {i,j} of the receiver and extends k-i rows +// and l-j columns. The final row in the resulting matrix is k-1 and the +// final column is l-1. +// Slice panics with ErrIndexOutOfRange if the slice is outside the capacity +// of the receiver. +func (m *CDense) Slice(i, k, j, l int) CMatrix { + return m.slice(i, k, j, l) +} + +func (m *CDense) slice(i, k, j, l int) *CDense { + mr, mc := m.Caps() + if i < 0 || mr <= i || j < 0 || mc <= j || k < i || mr < k || l < j || mc < l { + if i == k || j == l { + panic(ErrZeroLength) + } + panic(ErrIndexOutOfRange) + } + t := *m + t.mat.Data = t.mat.Data[i*t.mat.Stride+j : (k-1)*t.mat.Stride+l] + t.mat.Rows = k - i + t.mat.Cols = l - j + t.capRows -= i + t.capCols -= j + return &t +} + +// NewCDense creates a new complex Dense matrix with r rows and c columns. +// If data == nil, a new slice is allocated for the backing slice. +// If len(data) == r*c, data is used as the backing slice, and changes to the +// elements of the returned CDense will be reflected in data. +// If neither of these is true, NewCDense will panic. +// NewCDense will panic if either r or c is zero. +// +// The data must be arranged in row-major order, i.e. the (i*c + j)-th +// element in the data slice is the {i, j}-th element in the matrix. +func NewCDense(r, c int, data []complex128) *CDense { + if r <= 0 || c <= 0 { + if r == 0 || c == 0 { + panic(ErrZeroLength) + } + panic("mat: negative dimension") + } + if data != nil && r*c != len(data) { + panic(ErrShape) + } + if data == nil { + data = make([]complex128, r*c) + } + return &CDense{ + mat: cblas128.General{ + Rows: r, + Cols: c, + Stride: c, + Data: data, + }, + capRows: r, + capCols: c, + } +} + +// ReuseAs changes the receiver if it IsEmpty() to be of size r×c. +// +// ReuseAs re-uses the backing data slice if it has sufficient capacity, +// otherwise a new slice is allocated. The backing data is zero on return. +// +// ReuseAs panics if the receiver is not empty, and panics if +// the input sizes are less than one. To empty the receiver for re-use, +// Reset should be used. +func (m *CDense) ReuseAs(r, c int) { + if r <= 0 || c <= 0 { + if r == 0 || c == 0 { + panic(ErrZeroLength) + } + panic(ErrNegativeDimension) + } + if !m.IsEmpty() { + panic(ErrReuseNonEmpty) + } + m.reuseAsZeroed(r, c) +} + +// reuseAs resizes an empty matrix to a r×c matrix, +// or checks that a non-empty matrix is r×c. +// +// reuseAs must be kept in sync with reuseAsZeroed. +func (m *CDense) reuseAsNonZeroed(r, c int) { + if m.mat.Rows > m.capRows || m.mat.Cols > m.capCols { + // Panic as a string, not a mat.Error. + panic(badCap) + } + if r == 0 || c == 0 { + panic(ErrZeroLength) + } + if m.IsEmpty() { + m.mat = cblas128.General{ + Rows: r, + Cols: c, + Stride: c, + Data: useC(m.mat.Data, r*c), + } + m.capRows = r + m.capCols = c + return + } + if r != m.mat.Rows || c != m.mat.Cols { + panic(ErrShape) + } +} + +func (m *CDense) reuseAsZeroed(r, c int) { + // This must be kept in-sync with reuseAs. + if m.mat.Rows > m.capRows || m.mat.Cols > m.capCols { + // Panic as a string, not a mat.Error. + panic(badCap) + } + if r == 0 || c == 0 { + panic(ErrZeroLength) + } + if m.IsEmpty() { + m.mat = cblas128.General{ + Rows: r, + Cols: c, + Stride: c, + Data: useZeroedC(m.mat.Data, r*c), + } + m.capRows = r + m.capCols = c + return + } + if r != m.mat.Rows || c != m.mat.Cols { + panic(ErrShape) + } + m.Zero() +} + +// isolatedWorkspace returns a new dense matrix w with the size of a and +// returns a callback to defer which performs cleanup at the return of the call. +// This should be used when a method receiver is the same pointer as an input argument. +func (m *CDense) isolatedWorkspace(a CMatrix) (w *CDense, restore func()) { + r, c := a.Dims() + if r == 0 || c == 0 { + panic(ErrZeroLength) + } + w = getCDenseWorkspace(r, c, false) + return w, func() { + m.Copy(w) + putCDenseWorkspace(w) + } +} + +// Reset zeros the dimensions of the matrix so that it can be reused as the +// receiver of a dimensionally restricted operation. +// +// Reset should not be used when the matrix shares backing data. +// See the Reseter interface for more information. +func (m *CDense) Reset() { + // Row, Cols and Stride must be zeroed in unison. + m.mat.Rows, m.mat.Cols, m.mat.Stride = 0, 0, 0 + m.capRows, m.capCols = 0, 0 + m.mat.Data = m.mat.Data[:0] +} + +// IsEmpty returns whether the receiver is empty. Empty matrices can be the +// receiver for size-restricted operations. The receiver can be zeroed using Reset. +func (m *CDense) IsEmpty() bool { + // It must be the case that m.Dims() returns + // zeros in this case. See comment in Reset(). + return m.mat.Stride == 0 +} + +// Zero sets all of the matrix elements to zero. +func (m *CDense) Zero() { + r := m.mat.Rows + c := m.mat.Cols + for i := 0; i < r; i++ { + zeroC(m.mat.Data[i*m.mat.Stride : i*m.mat.Stride+c]) + } +} + +// Copy makes a copy of elements of a into the receiver. It is similar to the +// built-in copy; it copies as much as the overlap between the two matrices and +// returns the number of rows and columns it copied. If a aliases the receiver +// and is a transposed Dense or VecDense, with a non-unitary increment, Copy will +// panic. +// +// See the Copier interface for more information. +func (m *CDense) Copy(a CMatrix) (r, c int) { + r, c = a.Dims() + if a == m { + return r, c + } + r = min(r, m.mat.Rows) + c = min(c, m.mat.Cols) + if r == 0 || c == 0 { + return 0, 0 + } + // TODO(btracey): Check for overlap when complex version exists. + // TODO(btracey): Add fast-paths. + for i := 0; i < r; i++ { + for j := 0; j < c; j++ { + m.set(i, j, a.At(i, j)) + } + } + return r, c +} + +// SetRawCMatrix sets the underlying cblas128.General used by the receiver. +// Changes to elements in the receiver following the call will be reflected +// in b. +func (m *CDense) SetRawCMatrix(b cblas128.General) { + m.capRows, m.capCols = b.Rows, b.Cols + m.mat = b +} + +// RawCMatrix returns the underlying cblas128.General used by the receiver. +// Changes to elements in the receiver following the call will be reflected +// in returned cblas128.General. +func (m *CDense) RawCMatrix() cblas128.General { return m.mat } + +// Grow returns the receiver expanded by r rows and c columns. If the dimensions +// of the expanded matrix are outside the capacities of the receiver a new +// allocation is made, otherwise not. Note the receiver itself is not modified +// during the call to Grow. +func (m *CDense) Grow(r, c int) CMatrix { + if r < 0 || c < 0 { + panic(ErrIndexOutOfRange) + } + if r == 0 && c == 0 { + return m + } + + r += m.mat.Rows + c += m.mat.Cols + + var t CDense + switch { + case m.mat.Rows == 0 || m.mat.Cols == 0: + t.mat = cblas128.General{ + Rows: r, + Cols: c, + Stride: c, + // We zero because we don't know how the matrix will be used. + // In other places, the mat is immediately filled with a result; + // this is not the case here. + Data: useZeroedC(m.mat.Data, r*c), + } + case r > m.capRows || c > m.capCols: + cr := max(r, m.capRows) + cc := max(c, m.capCols) + t.mat = cblas128.General{ + Rows: r, + Cols: c, + Stride: cc, + Data: make([]complex128, cr*cc), + } + t.capRows = cr + t.capCols = cc + // Copy the complete matrix over to the new matrix. + // Including elements not currently visible. Use a temporary structure + // to avoid modifying the receiver. + var tmp CDense + tmp.mat = cblas128.General{ + Rows: m.mat.Rows, + Cols: m.mat.Cols, + Stride: m.mat.Stride, + Data: m.mat.Data, + } + tmp.capRows = m.capRows + tmp.capCols = m.capCols + t.Copy(&tmp) + return &t + default: + t.mat = cblas128.General{ + Data: m.mat.Data[:(r-1)*m.mat.Stride+c], + Rows: r, + Cols: c, + Stride: m.mat.Stride, + } + } + t.capRows = r + t.capCols = c + return &t +} diff --git a/vendor/gonum.org/v1/gonum/mat/cholesky.go b/vendor/gonum.org/v1/gonum/mat/cholesky.go new file mode 100644 index 0000000000..f11948d0f8 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/mat/cholesky.go @@ -0,0 +1,1203 @@ +// Copyright ©2013 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package mat + +import ( + "math" + + "gonum.org/v1/gonum/blas" + "gonum.org/v1/gonum/blas/blas64" + "gonum.org/v1/gonum/lapack/lapack64" +) + +const ( + badTriangle = "mat: invalid triangle" + badCholesky = "mat: invalid Cholesky factorization" +) + +var ( + _ Matrix = (*Cholesky)(nil) + _ Symmetric = (*Cholesky)(nil) + + _ Matrix = (*BandCholesky)(nil) + _ Symmetric = (*BandCholesky)(nil) + _ Banded = (*BandCholesky)(nil) + _ SymBanded = (*BandCholesky)(nil) + + _ Matrix = (*PivotedCholesky)(nil) + _ Symmetric = (*PivotedCholesky)(nil) +) + +// Cholesky is a symmetric positive definite matrix represented by its +// Cholesky decomposition. +// +// The decomposition can be constructed using the Factorize method. The +// factorization itself can be extracted using the UTo or LTo methods, and the +// original symmetric matrix can be recovered with ToSym. +// +// Note that this matrix representation is useful for certain operations, in +// particular finding solutions to linear equations. It is very inefficient +// at other operations, in particular At is slow. +// +// Cholesky methods may only be called on a value that has been successfully +// initialized by a call to Factorize that has returned true. Calls to methods +// of an unsuccessful Cholesky factorization will panic. +type Cholesky struct { + // The chol pointer must never be retained as a pointer outside the Cholesky + // struct, either by returning chol outside the struct or by setting it to + // a pointer coming from outside. The same prohibition applies to the data + // slice within chol. + chol *TriDense + cond float64 +} + +// updateCond updates the condition number of the Cholesky decomposition. If +// norm > 0, then that norm is used as the norm of the original matrix A, otherwise +// the norm is estimated from the decomposition. +func (c *Cholesky) updateCond(norm float64) { + n := c.chol.mat.N + work := getFloat64s(3*n, false) + defer putFloat64s(work) + if norm < 0 { + // This is an approximation. By the definition of a norm, + // |AB| <= |A| |B|. + // Since A = Uᵀ*U, we get for the condition number κ that + // κ(A) := |A| |A^-1| = |Uᵀ*U| |A^-1| <= |Uᵀ| |U| |A^-1|, + // so this will overestimate the condition number somewhat. + // The norm of the original factorized matrix cannot be stored + // because of update possibilities. + unorm := lapack64.Lantr(CondNorm, c.chol.mat, work) + lnorm := lapack64.Lantr(CondNormTrans, c.chol.mat, work) + norm = unorm * lnorm + } + sym := c.chol.asSymBlas() + iwork := getInts(n, false) + v := lapack64.Pocon(sym, norm, work, iwork) + putInts(iwork) + c.cond = 1 / v +} + +// Dims returns the dimensions of the matrix. +func (ch *Cholesky) Dims() (r, c int) { + n := ch.SymmetricDim() + return n, n +} + +// At returns the element at row i, column j. +func (c *Cholesky) At(i, j int) float64 { + n := c.SymmetricDim() + if uint(i) >= uint(n) { + panic(ErrRowAccess) + } + if uint(j) >= uint(n) { + panic(ErrColAccess) + } + + var val float64 + for k := 0; k <= min(i, j); k++ { + val += c.chol.at(k, i) * c.chol.at(k, j) + } + return val +} + +// T returns the receiver, the transpose of a symmetric matrix. +func (c *Cholesky) T() Matrix { + return c +} + +// SymmetricDim implements the Symmetric interface and returns the number of rows +// in the matrix (this is also the number of columns). +func (c *Cholesky) SymmetricDim() int { + if c.chol == nil { + return 0 + } + n, _ := c.chol.Triangle() + return n +} + +// Cond returns the condition number of the factorized matrix. +func (c *Cholesky) Cond() float64 { + if !c.valid() { + panic(badCholesky) + } + return c.cond +} + +// Factorize calculates the Cholesky decomposition of the matrix A and returns +// whether the matrix is positive definite. If Factorize returns false, the +// factorization must not be used. +func (c *Cholesky) Factorize(a Symmetric) (ok bool) { + n := a.SymmetricDim() + if c.chol == nil { + c.chol = NewTriDense(n, Upper, nil) + } else { + c.chol.Reset() + c.chol.reuseAsNonZeroed(n, Upper) + } + copySymIntoTriangle(c.chol, a) + + sym := c.chol.asSymBlas() + work := getFloat64s(c.chol.mat.N, false) + norm := lapack64.Lansy(CondNorm, sym, work) + putFloat64s(work) + _, ok = lapack64.Potrf(sym) + if ok { + c.updateCond(norm) + } else { + c.Reset() + } + return ok +} + +// Reset resets the factorization so that it can be reused as the receiver of a +// dimensionally restricted operation. +func (c *Cholesky) Reset() { + if c.chol != nil { + c.chol.Reset() + } + c.cond = math.Inf(1) +} + +// IsEmpty returns whether the receiver is empty. Empty matrices can be the +// receiver for size-restricted operations. The receiver can be emptied using +// Reset. +func (c *Cholesky) IsEmpty() bool { + return c.chol == nil || c.chol.IsEmpty() +} + +// SetFromU sets the Cholesky decomposition from the given triangular matrix. +// SetFromU panics if t is not upper triangular. If the receiver is empty it +// is resized to be n×n, the size of t. If dst is non-empty, SetFromU panics +// if c is not of size n×n. Note that t is copied into, not stored inside, the +// receiver. +func (c *Cholesky) SetFromU(t Triangular) { + n, kind := t.Triangle() + if kind != Upper { + panic("cholesky: matrix must be upper triangular") + } + if c.chol == nil { + c.chol = NewTriDense(n, Upper, nil) + } else { + c.chol.reuseAsNonZeroed(n, Upper) + } + c.chol.Copy(t) + c.updateCond(-1) +} + +// Clone makes a copy of the input Cholesky into the receiver, overwriting the +// previous value of the receiver. Clone does not place any restrictions on receiver +// shape. Clone panics if the input Cholesky is not the result of a valid decomposition. +func (c *Cholesky) Clone(chol *Cholesky) { + if !chol.valid() { + panic(badCholesky) + } + n := chol.SymmetricDim() + if c.chol == nil { + c.chol = NewTriDense(n, Upper, nil) + } else { + c.chol = NewTriDense(n, Upper, use(c.chol.mat.Data, n*n)) + } + c.chol.Copy(chol.chol) + c.cond = chol.cond +} + +// Det returns the determinant of the matrix that has been factorized. +func (c *Cholesky) Det() float64 { + if !c.valid() { + panic(badCholesky) + } + return math.Exp(c.LogDet()) +} + +// LogDet returns the log of the determinant of the matrix that has been factorized. +func (c *Cholesky) LogDet() float64 { + if !c.valid() { + panic(badCholesky) + } + var det float64 + for i := 0; i < c.chol.mat.N; i++ { + det += 2 * math.Log(c.chol.mat.Data[i*c.chol.mat.Stride+i]) + } + return det +} + +// SolveTo finds the matrix X that solves A * X = B where A is represented +// by the Cholesky decomposition. The result is stored in-place into dst. +// If the Cholesky decomposition is singular or near-singular a Condition error +// is returned. See the documentation for Condition for more information. +func (c *Cholesky) SolveTo(dst *Dense, b Matrix) error { + if !c.valid() { + panic(badCholesky) + } + n := c.chol.mat.N + bm, bn := b.Dims() + if n != bm { + panic(ErrShape) + } + + dst.reuseAsNonZeroed(bm, bn) + if b != dst { + dst.Copy(b) + } + lapack64.Potrs(c.chol.mat, dst.mat) + if c.cond > ConditionTolerance { + return Condition(c.cond) + } + return nil +} + +// SolveCholTo finds the matrix X that solves A * X = B where A and B are represented +// by their Cholesky decompositions a and b. The result is stored in-place into +// dst. +// If the Cholesky decomposition is singular or near-singular a Condition error +// is returned. See the documentation for Condition for more information. +func (a *Cholesky) SolveCholTo(dst *Dense, b *Cholesky) error { + if !a.valid() || !b.valid() { + panic(badCholesky) + } + bn := b.chol.mat.N + if a.chol.mat.N != bn { + panic(ErrShape) + } + + dst.reuseAsZeroed(bn, bn) + dst.Copy(b.chol.T()) + blas64.Trsm(blas.Left, blas.Trans, 1, a.chol.mat, dst.mat) + blas64.Trsm(blas.Left, blas.NoTrans, 1, a.chol.mat, dst.mat) + blas64.Trmm(blas.Right, blas.NoTrans, 1, b.chol.mat, dst.mat) + if a.cond > ConditionTolerance { + return Condition(a.cond) + } + return nil +} + +// SolveVecTo finds the vector x that solves A * x = b where A is represented +// by the Cholesky decomposition. The result is stored in-place into +// dst. +// If the Cholesky decomposition is singular or near-singular a Condition error +// is returned. See the documentation for Condition for more information. +func (c *Cholesky) SolveVecTo(dst *VecDense, b Vector) error { + if !c.valid() { + panic(badCholesky) + } + n := c.chol.mat.N + if br, bc := b.Dims(); br != n || bc != 1 { + panic(ErrShape) + } + switch rv := b.(type) { + default: + dst.reuseAsNonZeroed(n) + return c.SolveTo(dst.asDense(), b) + case RawVectorer: + bmat := rv.RawVector() + if dst != b { + dst.checkOverlap(bmat) + } + dst.reuseAsNonZeroed(n) + if dst != b { + dst.CopyVec(b) + } + lapack64.Potrs(c.chol.mat, dst.asGeneral()) + if c.cond > ConditionTolerance { + return Condition(c.cond) + } + return nil + } +} + +// RawU returns the Triangular matrix used to store the Cholesky factorization +// of the original matrix A. If the returned matrix is modified, the +// factorization is invalid and should not be used. +// +// If Factorize has not been called, RawU will return nil. +func (c *Cholesky) RawU() Triangular { + if !c.valid() { + return nil + } + return c.chol +} + +// UTo stores into dst the n×n upper triangular matrix U from a Cholesky +// decomposition +// +// A = Uᵀ * U. +// +// If dst is empty, it is resized to be an n×n upper triangular matrix. When dst +// is non-empty, UTo panics if dst is not n×n or not Upper. UTo will also panic +// if the receiver does not contain a successful factorization. +func (c *Cholesky) UTo(dst *TriDense) { + if !c.valid() { + panic(badCholesky) + } + n := c.chol.mat.N + if dst.IsEmpty() { + dst.ReuseAsTri(n, Upper) + } else { + n2, kind := dst.Triangle() + if n != n2 { + panic(ErrShape) + } + if kind != Upper { + panic(ErrTriangle) + } + } + dst.Copy(c.chol) +} + +// LTo stores into dst the n×n lower triangular matrix L from a Cholesky +// decomposition +// +// A = L * Lᵀ. +// +// If dst is empty, it is resized to be an n×n lower triangular matrix. When dst +// is non-empty, LTo panics if dst is not n×n or not Lower. LTo will also panic +// if the receiver does not contain a successful factorization. +func (c *Cholesky) LTo(dst *TriDense) { + if !c.valid() { + panic(badCholesky) + } + n := c.chol.mat.N + if dst.IsEmpty() { + dst.ReuseAsTri(n, Lower) + } else { + n2, kind := dst.Triangle() + if n != n2 { + panic(ErrShape) + } + if kind != Lower { + panic(ErrTriangle) + } + } + dst.Copy(c.chol.TTri()) +} + +// ToSym reconstructs the original positive definite matrix from its +// Cholesky decomposition, storing the result into dst. If dst is +// empty it is resized to be n×n. If dst is non-empty, ToSym panics +// if dst is not of size n×n. ToSym will also panic if the receiver +// does not contain a successful factorization. +func (c *Cholesky) ToSym(dst *SymDense) { + if !c.valid() { + panic(badCholesky) + } + n := c.chol.mat.N + if dst.IsEmpty() { + dst.ReuseAsSym(n) + } else { + n2 := dst.SymmetricDim() + if n != n2 { + panic(ErrShape) + } + } + // Create a TriDense representing the Cholesky factor U with dst's + // backing slice. + // Operations on u are reflected in s. + u := &TriDense{ + mat: blas64.Triangular{ + Uplo: blas.Upper, + Diag: blas.NonUnit, + N: n, + Data: dst.mat.Data, + Stride: dst.mat.Stride, + }, + cap: n, + } + u.Copy(c.chol) + // Compute the product Uᵀ*U using the algorithm from LAPACK/TESTING/LIN/dpot01.f + a := u.mat.Data + lda := u.mat.Stride + bi := blas64.Implementation() + for k := n - 1; k >= 0; k-- { + a[k*lda+k] = bi.Ddot(k+1, a[k:], lda, a[k:], lda) + if k > 0 { + bi.Dtrmv(blas.Upper, blas.Trans, blas.NonUnit, k, a, lda, a[k:], lda) + } + } +} + +// InverseTo computes the inverse of the matrix represented by its Cholesky +// factorization and stores the result into s. If the factorized +// matrix is ill-conditioned, a Condition error will be returned. +// Note that matrix inversion is numerically unstable, and should generally be +// avoided where possible, for example by using the Solve routines. +func (c *Cholesky) InverseTo(dst *SymDense) error { + if !c.valid() { + panic(badCholesky) + } + dst.reuseAsNonZeroed(c.chol.mat.N) + // Create a TriDense representing the Cholesky factor U with the backing + // slice from dst. + // Operations on u are reflected in dst. + u := &TriDense{ + mat: blas64.Triangular{ + Uplo: blas.Upper, + Diag: blas.NonUnit, + N: dst.mat.N, + Data: dst.mat.Data, + Stride: dst.mat.Stride, + }, + cap: dst.mat.N, + } + u.Copy(c.chol) + + _, ok := lapack64.Potri(u.mat) + if !ok { + return Condition(math.Inf(1)) + } + if c.cond > ConditionTolerance { + return Condition(c.cond) + } + return nil +} + +// Scale multiplies the original matrix A by a positive constant using +// its Cholesky decomposition, storing the result in-place into the receiver. +// That is, if the original Cholesky factorization is +// +// Uᵀ * U = A +// +// the updated factorization is +// +// U'ᵀ * U' = f A = A' +// +// Scale panics if the constant is non-positive, or if the receiver is non-empty +// and is of a different size from the input. +func (c *Cholesky) Scale(f float64, orig *Cholesky) { + if !orig.valid() { + panic(badCholesky) + } + if f <= 0 { + panic("cholesky: scaling by a non-positive constant") + } + n := orig.SymmetricDim() + if c.chol == nil { + c.chol = NewTriDense(n, Upper, nil) + } else if c.chol.mat.N != n { + panic(ErrShape) + } + c.chol.ScaleTri(math.Sqrt(f), orig.chol) + c.cond = orig.cond // Scaling by a positive constant does not change the condition number. +} + +// ExtendVecSym computes the Cholesky decomposition of the original matrix A, +// whose Cholesky decomposition is in a, extended by a the n×1 vector v according to +// +// [A w] +// [w' k] +// +// where k = v[n-1] and w = v[:n-1]. The result is stored into the receiver. +// In order for the updated matrix to be positive definite, it must be the case +// that k > w' A^-1 w. If this condition does not hold then ExtendVecSym will +// return false and the receiver will not be updated. +// +// ExtendVecSym will panic if v.Len() != a.SymmetricDim()+1 or if a does not contain +// a valid decomposition. +func (c *Cholesky) ExtendVecSym(a *Cholesky, v Vector) (ok bool) { + n := a.SymmetricDim() + + if v.Len() != n+1 { + panic(badSliceLength) + } + if !a.valid() { + panic(badCholesky) + } + + // The algorithm is commented here, but see also + // https://math.stackexchange.com/questions/955874/cholesky-factor-when-adding-a-row-and-column-to-already-factorized-matrix + // We have A and want to compute the Cholesky of + // [A w] + // [w' k] + // We want + // [U c] + // [0 d] + // to be the updated Cholesky, and so it must be that + // [A w] = [U' 0] [U c] + // [w' k] [c' d] [0 d] + // Thus, we need + // 1) A = U'U (true by the original decomposition being valid), + // 2) U' * c = w => c = U'^-1 w + // 3) c'*c + d'*d = k => d = sqrt(k-c'*c) + + // First, compute c = U'^-1 a + w := NewVecDense(n, nil) + w.CopyVec(v) + k := v.At(n, 0) + + var t VecDense + _ = t.SolveVec(a.chol.T(), w) + + dot := Dot(&t, &t) + if dot >= k { + return false + } + d := math.Sqrt(k - dot) + + newU := NewTriDense(n+1, Upper, nil) + newU.Copy(a.chol) + for i := 0; i < n; i++ { + newU.SetTri(i, n, t.At(i, 0)) + } + newU.SetTri(n, n, d) + c.chol = newU + c.updateCond(-1) + return true +} + +// SymRankOne performs a rank-1 update of the original matrix A and refactorizes +// its Cholesky factorization, storing the result into the receiver. That is, if +// in the original Cholesky factorization +// +// Uᵀ * U = A, +// +// in the updated factorization +// +// U'ᵀ * U' = A + alpha * x * xᵀ = A'. +// +// Note that when alpha is negative, the updating problem may be ill-conditioned +// and the results may be inaccurate, or the updated matrix A' may not be +// positive definite and not have a Cholesky factorization. SymRankOne returns +// whether the updated matrix A' is positive definite. If the update fails +// the receiver is left unchanged. +// +// SymRankOne updates a Cholesky factorization in O(n²) time. The Cholesky +// factorization computation from scratch is O(n³). +func (c *Cholesky) SymRankOne(orig *Cholesky, alpha float64, x Vector) (ok bool) { + if !orig.valid() { + panic(badCholesky) + } + n := orig.SymmetricDim() + if r, c := x.Dims(); r != n || c != 1 { + panic(ErrShape) + } + if orig != c { + if c.chol == nil { + c.chol = NewTriDense(n, Upper, nil) + } else if c.chol.mat.N != n { + panic(ErrShape) + } + c.chol.Copy(orig.chol) + } + + if alpha == 0 { + return true + } + + // Algorithms for updating and downdating the Cholesky factorization are + // described, for example, in + // - J. J. Dongarra, J. R. Bunch, C. B. Moler, G. W. Stewart: LINPACK + // Users' Guide. SIAM (1979), pages 10.10--10.14 + // or + // - P. E. Gill, G. H. Golub, W. Murray, and M. A. Saunders: Methods for + // modifying matrix factorizations. Mathematics of Computation 28(126) + // (1974), Method C3 on page 521 + // + // The implementation is based on LINPACK code + // http://www.netlib.org/linpack/dchud.f + // http://www.netlib.org/linpack/dchdd.f + // and + // https://icl.cs.utk.edu/lapack-forum/viewtopic.php?f=2&t=2646 + // + // According to http://icl.cs.utk.edu/lapack-forum/archives/lapack/msg00301.html + // LINPACK is released under BSD license. + // + // See also: + // - M. A. Saunders: Large-scale Linear Programming Using the Cholesky + // Factorization. Technical Report Stanford University (1972) + // http://i.stanford.edu/pub/cstr/reports/cs/tr/72/252/CS-TR-72-252.pdf + // - Matthias Seeger: Low rank updates for the Cholesky decomposition. + // EPFL Technical Report 161468 (2004) + // http://infoscience.epfl.ch/record/161468 + + work := getFloat64s(n, false) + defer putFloat64s(work) + var xmat blas64.Vector + if rv, ok := x.(RawVectorer); ok { + xmat = rv.RawVector() + } else { + var tmp *VecDense + tmp.CopyVec(x) + xmat = tmp.RawVector() + } + blas64.Copy(xmat, blas64.Vector{N: n, Data: work, Inc: 1}) + + if alpha > 0 { + // Compute rank-1 update. + if alpha != 1 { + blas64.Scal(math.Sqrt(alpha), blas64.Vector{N: n, Data: work, Inc: 1}) + } + umat := c.chol.mat + stride := umat.Stride + for i := 0; i < n; i++ { + // Compute parameters of the Givens matrix that zeroes + // the i-th element of x. + c, s, r, _ := blas64.Rotg(umat.Data[i*stride+i], work[i]) + if r < 0 { + // Multiply by -1 to have positive diagonal + // elements. + r *= -1 + c *= -1 + s *= -1 + } + umat.Data[i*stride+i] = r + if i < n-1 { + // Multiply the extended factorization matrix by + // the Givens matrix from the left. Only + // the i-th row and x are modified. + blas64.Rot( + blas64.Vector{N: n - i - 1, Data: umat.Data[i*stride+i+1 : i*stride+n], Inc: 1}, + blas64.Vector{N: n - i - 1, Data: work[i+1 : n], Inc: 1}, + c, s) + } + } + c.updateCond(-1) + return true + } + + // Compute rank-1 downdate. + alpha = math.Sqrt(-alpha) + if alpha != 1 { + blas64.Scal(alpha, blas64.Vector{N: n, Data: work, Inc: 1}) + } + // Solve Uᵀ * p = x storing the result into work. + ok = lapack64.Trtrs(blas.Trans, c.chol.RawTriangular(), blas64.General{ + Rows: n, + Cols: 1, + Stride: 1, + Data: work, + }) + if !ok { + // The original matrix is singular. Should not happen, because + // the factorization is valid. + panic(badCholesky) + } + norm := blas64.Nrm2(blas64.Vector{N: n, Data: work, Inc: 1}) + if norm >= 1 { + // The updated matrix is not positive definite. + return false + } + norm = math.Sqrt((1 + norm) * (1 - norm)) + cos := getFloat64s(n, false) + defer putFloat64s(cos) + sin := getFloat64s(n, false) + defer putFloat64s(sin) + for i := n - 1; i >= 0; i-- { + // Compute parameters of Givens matrices that zero elements of p + // backwards. + cos[i], sin[i], norm, _ = blas64.Rotg(norm, work[i]) + if norm < 0 { + norm *= -1 + cos[i] *= -1 + sin[i] *= -1 + } + } + workMat := getTriDenseWorkspace(c.chol.mat.N, c.chol.triKind(), false) + defer putTriWorkspace(workMat) + workMat.Copy(c.chol) + umat := workMat.mat + stride := workMat.mat.Stride + for i := n - 1; i >= 0; i-- { + work[i] = 0 + // Apply Givens matrices to U. + blas64.Rot( + blas64.Vector{N: n - i, Data: work[i:n], Inc: 1}, + blas64.Vector{N: n - i, Data: umat.Data[i*stride+i : i*stride+n], Inc: 1}, + cos[i], sin[i]) + if umat.Data[i*stride+i] == 0 { + // The matrix is singular (may rarely happen due to + // floating-point effects?). + ok = false + } else if umat.Data[i*stride+i] < 0 { + // Diagonal elements should be positive. If it happens + // that on the i-th row the diagonal is negative, + // multiply U from the left by an identity matrix that + // has -1 on the i-th row. + blas64.Scal(-1, blas64.Vector{N: n - i, Data: umat.Data[i*stride+i : i*stride+n], Inc: 1}) + } + } + if ok { + c.chol.Copy(workMat) + c.updateCond(-1) + } + return ok +} + +func (c *Cholesky) valid() bool { + return c.chol != nil && !c.chol.IsEmpty() +} + +// BandCholesky is a symmetric positive-definite band matrix represented by its +// Cholesky decomposition. +// +// Note that this matrix representation is useful for certain operations, in +// particular finding solutions to linear equations. It is very inefficient at +// other operations, in particular At is slow. +// +// BandCholesky methods may only be called on a value that has been successfully +// initialized by a call to Factorize that has returned true. Calls to methods +// of an unsuccessful Cholesky factorization will panic. +type BandCholesky struct { + // The chol pointer must never be retained as a pointer outside the Cholesky + // struct, either by returning chol outside the struct or by setting it to + // a pointer coming from outside. The same prohibition applies to the data + // slice within chol. + chol *TriBandDense + cond float64 +} + +// Factorize calculates the Cholesky decomposition of the matrix A and returns +// whether the matrix is positive definite. If Factorize returns false, the +// factorization must not be used. +func (ch *BandCholesky) Factorize(a SymBanded) (ok bool) { + n, k := a.SymBand() + if ch.chol == nil { + ch.chol = NewTriBandDense(n, k, Upper, nil) + } else { + ch.chol.Reset() + ch.chol.ReuseAsTriBand(n, k, Upper) + } + copySymBandIntoTriBand(ch.chol, a) + cSym := blas64.SymmetricBand{ + Uplo: blas.Upper, + N: n, + K: k, + Data: ch.chol.RawTriBand().Data, + Stride: ch.chol.RawTriBand().Stride, + } + _, ok = lapack64.Pbtrf(cSym) + if !ok { + ch.Reset() + return false + } + work := getFloat64s(3*n, false) + iwork := getInts(n, false) + aNorm := lapack64.Lansb(CondNorm, cSym, work) + ch.cond = 1 / lapack64.Pbcon(cSym, aNorm, work, iwork) + putInts(iwork) + putFloat64s(work) + return true +} + +// SolveTo finds the matrix X that solves A * X = B where A is represented by +// the Cholesky decomposition. The result is stored in-place into dst. +// If the Cholesky decomposition is singular or near-singular a Condition error +// is returned. See the documentation for Condition for more information. +func (ch *BandCholesky) SolveTo(dst *Dense, b Matrix) error { + if !ch.valid() { + panic(badCholesky) + } + br, bc := b.Dims() + if br != ch.chol.mat.N { + panic(ErrShape) + } + dst.reuseAsNonZeroed(br, bc) + if b != dst { + dst.Copy(b) + } + lapack64.Pbtrs(ch.chol.mat, dst.mat) + if ch.cond > ConditionTolerance { + return Condition(ch.cond) + } + return nil +} + +// SolveVecTo finds the vector x that solves A * x = b where A is represented by +// the Cholesky decomposition. The result is stored in-place into dst. +// If the Cholesky decomposition is singular or near-singular a Condition error +// is returned. See the documentation for Condition for more information. +func (ch *BandCholesky) SolveVecTo(dst *VecDense, b Vector) error { + if !ch.valid() { + panic(badCholesky) + } + n := ch.chol.mat.N + if br, bc := b.Dims(); br != n || bc != 1 { + panic(ErrShape) + } + if b, ok := b.(RawVectorer); ok && dst != b { + dst.checkOverlap(b.RawVector()) + } + dst.reuseAsNonZeroed(n) + if dst != b { + dst.CopyVec(b) + } + lapack64.Pbtrs(ch.chol.mat, dst.asGeneral()) + if ch.cond > ConditionTolerance { + return Condition(ch.cond) + } + return nil +} + +// Cond returns the condition number of the factorized matrix. +func (ch *BandCholesky) Cond() float64 { + if !ch.valid() { + panic(badCholesky) + } + return ch.cond +} + +// Reset resets the factorization so that it can be reused as the receiver of +// a dimensionally restricted operation. +func (ch *BandCholesky) Reset() { + if ch.chol != nil { + ch.chol.Reset() + } + ch.cond = math.Inf(1) +} + +// Dims returns the dimensions of the matrix. +func (ch *BandCholesky) Dims() (r, c int) { + n := ch.SymmetricDim() + return n, n +} + +// At returns the element at row i, column j. +func (ch *BandCholesky) At(i, j int) float64 { + n, k := ch.SymBand() + if uint(i) >= uint(n) { + panic(ErrRowAccess) + } + if uint(j) >= uint(n) { + panic(ErrColAccess) + } + + if i > j { + i, j = j, i + } + if j-i > k { + return 0 + } + var aij float64 + for k := max(0, j-k); k <= i; k++ { + aij += ch.chol.at(k, i) * ch.chol.at(k, j) + } + return aij +} + +// T returns the receiver, the transpose of a symmetric matrix. +func (ch *BandCholesky) T() Matrix { + return ch +} + +// TBand returns the receiver, the transpose of a symmetric band matrix. +func (ch *BandCholesky) TBand() Banded { + return ch +} + +// SymmetricDim implements the Symmetric interface and returns the number of rows +// in the matrix (this is also the number of columns). +func (ch *BandCholesky) SymmetricDim() int { + if ch.chol == nil { + return 0 + } + n, _ := ch.chol.Triangle() + return n +} + +// Bandwidth returns the lower and upper bandwidth values for the matrix. +// The total bandwidth of the matrix is kl+ku+1. +func (ch *BandCholesky) Bandwidth() (kl, ku int) { + _, k, _ := ch.chol.TriBand() + return k, k +} + +// SymBand returns the number of rows/columns in the matrix, and the size of the +// bandwidth. The total bandwidth of the matrix is 2*k+1. +func (ch *BandCholesky) SymBand() (n, k int) { + n, k, _ = ch.chol.TriBand() + return n, k +} + +// IsEmpty returns whether the receiver is empty. Empty matrices can be the +// receiver for dimensionally restricted operations. The receiver can be emptied +// using Reset. +func (ch *BandCholesky) IsEmpty() bool { + return ch == nil || ch.chol.IsEmpty() +} + +// Det returns the determinant of the matrix that has been factorized. +func (ch *BandCholesky) Det() float64 { + if !ch.valid() { + panic(badCholesky) + } + return math.Exp(ch.LogDet()) +} + +// LogDet returns the log of the determinant of the matrix that has been factorized. +func (ch *BandCholesky) LogDet() float64 { + if !ch.valid() { + panic(badCholesky) + } + var det float64 + for i := 0; i < ch.chol.mat.N; i++ { + det += 2 * math.Log(ch.chol.mat.Data[i*ch.chol.mat.Stride]) + } + return det +} + +func (ch *BandCholesky) valid() bool { + return ch.chol != nil && !ch.chol.IsEmpty() +} + +// PivotedCholesky is a symmetric positive semi-definite matrix represented by +// its Cholesky factorization with complete pivoting. +// +// The factorization has the form +// +// Pᵀ * A * P = Uᵀ * U +// +// where U is an upper triangular matrix and P is a permutation matrix. +// +// Cholesky methods may only be called on a receiver that has been initialized +// by a call to Factorize. SolveTo and SolveVecTo methods may only called if +// Factorize has returned true. +// +// If the matrix A is certainly positive definite, then the unpivoted Cholesky +// could be more efficient, especially for smaller matrices. +type PivotedCholesky struct { + chol *TriDense // The factor U + piv, pivTrans []int // The permutation matrices P and Pᵀ + rank int // The computed rank of A + + ok bool // Indicates whether and the factorization can be used for solving linear systems + cond float64 // The condition number when ok is true +} + +// Factorize computes the Cholesky factorization of the symmetric positive +// semi-definite matrix A and returns whether the matrix is positive definite. +// If Factorize returns false, the SolveTo methods must not be used. +// +// tol is a tolerance used to determine the computed rank of A. If it is +// negative, a default value will be used. +func (c *PivotedCholesky) Factorize(a Symmetric, tol float64) (ok bool) { + n := a.SymmetricDim() + c.reset(n) + copySymIntoTriangle(c.chol, a) + + work := getFloat64s(3*c.chol.mat.N, false) + defer putFloat64s(work) + + sym := c.chol.asSymBlas() + aNorm := lapack64.Lansy(CondNorm, sym, work) + _, c.rank, c.ok = lapack64.Pstrf(sym, c.piv, tol, work) + if c.ok { + iwork := getInts(n, false) + defer putInts(iwork) + c.cond = 1 / lapack64.Pocon(sym, aNorm, work, iwork) + } else { + for i := c.rank; i < n; i++ { + zero(sym.Data[i*sym.Stride+i : i*sym.Stride+n]) + } + } + for i, p := range c.piv { + c.pivTrans[p] = i + } + + return c.ok +} + +// reset prepares the receiver for factorization of matrices of size n. +func (c *PivotedCholesky) reset(n int) { + if c.chol == nil { + c.chol = NewTriDense(n, Upper, nil) + } else { + c.chol.Reset() + c.chol.reuseAsNonZeroed(n, Upper) + } + c.piv = useInt(c.piv, n) + c.pivTrans = useInt(c.pivTrans, n) + c.rank = 0 + c.ok = false + c.cond = math.Inf(1) +} + +// Dims returns the dimensions of the matrix A. +func (ch *PivotedCholesky) Dims() (r, c int) { + n := ch.SymmetricDim() + return n, n +} + +// At returns the element of A at row i, column j. +func (c *PivotedCholesky) At(i, j int) float64 { + n := c.SymmetricDim() + if uint(i) >= uint(n) { + panic(ErrRowAccess) + } + if uint(j) >= uint(n) { + panic(ErrColAccess) + } + + i = c.pivTrans[i] + j = c.pivTrans[j] + minij := min(min(i+1, j+1), c.rank) + var val float64 + for k := 0; k < minij; k++ { + val += c.chol.at(k, i) * c.chol.at(k, j) + } + return val +} + +// T returns the receiver, the transpose of a symmetric matrix. +func (c *PivotedCholesky) T() Matrix { + return c +} + +// SymmetricDim implements the Symmetric interface and returns the number of +// rows (or columns) in the matrix . +func (c *PivotedCholesky) SymmetricDim() int { + if c.chol == nil { + return 0 + } + n, _ := c.chol.Triangle() + return n +} + +// Rank returns the computed rank of the matrix A. +func (c *PivotedCholesky) Rank() int { + if c.chol == nil { + panic(badCholesky) + } + return c.rank +} + +// Cond returns the condition number of the factorized matrix. +func (c *PivotedCholesky) Cond() float64 { + if c.chol == nil { + panic(badCholesky) + } + return c.cond +} + +// RawU returns the Triangular matrix used to store the Cholesky factorization +// of the original matrix A. If the returned matrix is modified, the +// factorization is invalid and should not be used. +// +// If Factorized returned false, the rows of U from Rank to n will contain zeros +// and so U will be upper trapezoidal. +// +// If Factorize has not been called, RawU will return nil. +func (c *PivotedCholesky) RawU() Triangular { + if c.chol == nil { + return nil + } + return c.chol +} + +// UTo stores the n×n upper triangular matrix U from the Cholesky factorization +// +// Pᵀ * A * P = Uᵀ * U. +// +// into dst. If dst is empty, it is resized to be an n×n upper triangular +// matrix. When dst is non-empty, UTo panics if dst is not n×n or not Upper. +// +// If Factorized returned false, the rows of U from Rank to n will contain zeros +// and so U will be upper trapezoidal. +func (c *PivotedCholesky) UTo(dst *TriDense) { + if c.chol == nil { + panic(badCholesky) + } + n := c.chol.mat.N + if dst.IsEmpty() { + dst.ReuseAsTri(n, Upper) + } else { + n2, kind := dst.Triangle() + if n != n2 { + panic(ErrShape) + } + if kind != Upper { + panic(ErrTriangle) + } + } + dst.Copy(c.chol) +} + +// ColumnPivots returns the column permutation p that represents the permutation +// matrix P from the Cholesky factorization +// +// Pᵀ * A * P = Uᵀ * U +// +// such that the nonzero entries are P[p[k],k] = 1. +func (c *PivotedCholesky) ColumnPivots(dst []int) []int { + if c.chol == nil { + panic(badCholesky) + } + n := c.chol.mat.N + if dst == nil { + dst = make([]int, n) + } + if len(dst) != n { + panic(badSliceLength) + } + copy(dst, c.piv) + return dst +} + +// SolveTo finds the matrix X that solves A * X = B where A is represented by +// the Cholesky decomposition. The result is stored in-place into dst. If the +// Cholesky decomposition is singular or near-singular, a Condition error is +// returned. See the documentation for Condition for more information. +// +// If Factorize returned false, SolveTo will panic. +func (c *PivotedCholesky) SolveTo(dst *Dense, b Matrix) error { + if !c.ok { + panic(badCholesky) + } + n := c.chol.mat.N + bm, bn := b.Dims() + if n != bm { + panic(ErrShape) + } + + dst.reuseAsNonZeroed(bm, bn) + if dst != b { + dst.Copy(b) + } + + // Permute rows of B: D = Pᵀ * B. + lapack64.Lapmr(true, dst.mat, c.piv) + // Solve Uᵀ * U * Y = D. + lapack64.Potrs(c.chol.mat, dst.mat) + // Permute rows of Y to recover the solution: X = P * Y. + lapack64.Lapmr(false, dst.mat, c.piv) + + if c.cond > ConditionTolerance { + return Condition(c.cond) + } + return nil +} + +// SolveVecTo finds the vector x that solves A * x = b where A is represented by +// the Cholesky decomposition. The result is stored in-place into dst. If the +// Cholesky decomposition is singular or near-singular, a Condition error is +// returned. See the documentation for Condition for more information. +// +// If Factorize returned false, SolveVecTo will panic. +func (c *PivotedCholesky) SolveVecTo(dst *VecDense, b Vector) error { + if !c.ok { + panic(badCholesky) + } + n := c.chol.mat.N + if br, bc := b.Dims(); br != n || bc != 1 { + panic(ErrShape) + } + if b, ok := b.(RawVectorer); ok && dst != b { + dst.checkOverlap(b.RawVector()) + } + + dst.reuseAsNonZeroed(n) + if dst != b { + dst.CopyVec(b) + } + + // Permute rows of B: D = Pᵀ * B. + lapack64.Lapmr(true, dst.asGeneral(), c.piv) + // Solve Uᵀ * U * Y = D. + lapack64.Potrs(c.chol.mat, dst.asGeneral()) + // Permute rows of Y to recover the solution: X = P * Y. + lapack64.Lapmr(false, dst.asGeneral(), c.piv) + + if c.cond > ConditionTolerance { + return Condition(c.cond) + } + return nil +} diff --git a/vendor/gonum.org/v1/gonum/mat/cmatrix.go b/vendor/gonum.org/v1/gonum/mat/cmatrix.go new file mode 100644 index 0000000000..336645751d --- /dev/null +++ b/vendor/gonum.org/v1/gonum/mat/cmatrix.go @@ -0,0 +1,314 @@ +// Copyright ©2013 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package mat + +import ( + "math" + "math/cmplx" + + "gonum.org/v1/gonum/blas/cblas128" + "gonum.org/v1/gonum/floats/scalar" +) + +// CMatrix is the basic matrix interface type for complex matrices. +type CMatrix interface { + // Dims returns the dimensions of a CMatrix. + Dims() (r, c int) + + // At returns the value of a matrix element at row i, column j. + // It will panic if i or j are out of bounds for the matrix. + At(i, j int) complex128 + + // H returns the conjugate transpose of the CMatrix. Whether H + // returns a copy of the underlying data is implementation dependent. + // This method may be implemented using the ConjTranspose type, which + // provides an implicit matrix conjugate transpose. + H() CMatrix + + // T returns the transpose of the CMatrix. Whether T returns a copy of the + // underlying data is implementation dependent. + // This method may be implemented using the CTranspose type, which + // provides an implicit matrix transpose. + T() CMatrix +} + +// A RawCMatrixer can return a cblas128.General representation of the receiver. Changes to the cblas128.General.Data +// slice will be reflected in the original matrix, changes to the Rows, Cols and Stride fields will not. +type RawCMatrixer interface { + RawCMatrix() cblas128.General +} + +var ( + _ CMatrix = ConjTranspose{} + _ UnConjTransposer = ConjTranspose{} +) + +// ConjTranspose is a type for performing an implicit matrix conjugate transpose. +// It implements the CMatrix interface, returning values from the conjugate +// transpose of the matrix within. +type ConjTranspose struct { + CMatrix CMatrix +} + +// At returns the value of the element at row i and column j of the conjugate +// transposed matrix, that is, row j and column i of the CMatrix field. +func (t ConjTranspose) At(i, j int) complex128 { + z := t.CMatrix.At(j, i) + return cmplx.Conj(z) +} + +// Dims returns the dimensions of the transposed matrix. The number of rows returned +// is the number of columns in the CMatrix field, and the number of columns is +// the number of rows in the CMatrix field. +func (t ConjTranspose) Dims() (r, c int) { + c, r = t.CMatrix.Dims() + return r, c +} + +// H performs an implicit conjugate transpose by returning the CMatrix field. +func (t ConjTranspose) H() CMatrix { + return t.CMatrix +} + +// T performs an implicit transpose by returning the receiver inside a +// CTranspose. +func (t ConjTranspose) T() CMatrix { + return CTranspose{t} +} + +// UnConjTranspose returns the CMatrix field. +func (t ConjTranspose) UnConjTranspose() CMatrix { + return t.CMatrix +} + +// CTranspose is a type for performing an implicit matrix conjugate transpose. +// It implements the CMatrix interface, returning values from the conjugate +// transpose of the matrix within. +type CTranspose struct { + CMatrix CMatrix +} + +// At returns the value of the element at row i and column j of the conjugate +// transposed matrix, that is, row j and column i of the CMatrix field. +func (t CTranspose) At(i, j int) complex128 { + return t.CMatrix.At(j, i) +} + +// Dims returns the dimensions of the transposed matrix. The number of rows returned +// is the number of columns in the CMatrix field, and the number of columns is +// the number of rows in the CMatrix field. +func (t CTranspose) Dims() (r, c int) { + c, r = t.CMatrix.Dims() + return r, c +} + +// H performs an implicit transpose by returning the receiver inside a +// ConjTranspose. +func (t CTranspose) H() CMatrix { + return ConjTranspose{t} +} + +// T performs an implicit conjugate transpose by returning the CMatrix field. +func (t CTranspose) T() CMatrix { + return t.CMatrix +} + +// Untranspose returns the CMatrix field. +func (t CTranspose) Untranspose() CMatrix { + return t.CMatrix +} + +// UnConjTransposer is a type that can undo an implicit conjugate transpose. +type UnConjTransposer interface { + // UnConjTranspose returns the underlying CMatrix stored for the implicit + // conjugate transpose. + UnConjTranspose() CMatrix + + // Note: This interface is needed to unify all of the Conjugate types. In + // the cmat128 methods, we need to test if the CMatrix has been implicitly + // transposed. If this is checked by testing for the specific Conjugate type + // then the behavior will be different if the user uses H() or HTri() for a + // triangular matrix. +} + +// CUntransposer is a type that can undo an implicit transpose. +type CUntransposer interface { + // Untranspose returns the underlying CMatrix stored for the implicit + // transpose. + Untranspose() CMatrix + + // Note: This interface is needed to unify all of the CTranspose types. In + // the cmat128 methods, we need to test if the CMatrix has been implicitly + // transposed. If this is checked by testing for the specific CTranspose type + // then the behavior will be different if the user uses T() or TTri() for a + // triangular matrix. +} + +// useC returns a complex128 slice with l elements, using c if it +// has the necessary capacity, otherwise creating a new slice. +func useC(c []complex128, l int) []complex128 { + if l <= cap(c) { + return c[:l] + } + return make([]complex128, l) +} + +// useZeroedC returns a complex128 slice with l elements, using c if it +// has the necessary capacity, otherwise creating a new slice. The +// elements of the returned slice are guaranteed to be zero. +func useZeroedC(c []complex128, l int) []complex128 { + if l <= cap(c) { + c = c[:l] + zeroC(c) + return c + } + return make([]complex128, l) +} + +// zeroC zeros the given slice's elements. +func zeroC(c []complex128) { + for i := range c { + c[i] = 0 + } +} + +// untransposeCmplx untransposes a matrix if applicable. If a is an CUntransposer +// or an UnConjTransposer, then untranspose returns the underlying matrix and true for +// the kind of transpose (potentially both). +// If it is not, then it returns the input matrix and false for trans and conj. +func untransposeCmplx(a CMatrix) (u CMatrix, trans, conj bool) { + switch ut := a.(type) { + case CUntransposer: + trans = true + u := ut.Untranspose() + if uc, ok := u.(UnConjTransposer); ok { + return uc.UnConjTranspose(), trans, true + } + return u, trans, false + case UnConjTransposer: + conj = true + u := ut.UnConjTranspose() + if ut, ok := u.(CUntransposer); ok { + return ut.Untranspose(), true, conj + } + return u, false, conj + default: + return a, false, false + } +} + +// untransposeExtractCmplx returns an untransposed matrix in a built-in matrix type. +// +// The untransposed matrix is returned unaltered if it is a built-in matrix type. +// Otherwise, if it implements a Raw method, an appropriate built-in type value +// is returned holding the raw matrix value of the input. If neither of these +// is possible, the untransposed matrix is returned. +func untransposeExtractCmplx(a CMatrix) (u CMatrix, trans, conj bool) { + ut, trans, conj := untransposeCmplx(a) + switch m := ut.(type) { + case *CDense: + return m, trans, conj + case RawCMatrixer: + var d CDense + d.SetRawCMatrix(m.RawCMatrix()) + return &d, trans, conj + default: + return ut, trans, conj + } +} + +// CEqual returns whether the matrices a and b have the same size +// and are element-wise equal. +func CEqual(a, b CMatrix) bool { + ar, ac := a.Dims() + br, bc := b.Dims() + if ar != br || ac != bc { + return false + } + // TODO(btracey): Add in fast-paths. + for i := 0; i < ar; i++ { + for j := 0; j < ac; j++ { + if a.At(i, j) != b.At(i, j) { + return false + } + } + } + return true +} + +// CEqualApprox returns whether the matrices a and b have the same size and contain all equal +// elements with tolerance for element-wise equality specified by epsilon. Matrices +// with non-equal shapes are not equal. +func CEqualApprox(a, b CMatrix, epsilon float64) bool { + // TODO(btracey): + ar, ac := a.Dims() + br, bc := b.Dims() + if ar != br || ac != bc { + return false + } + for i := 0; i < ar; i++ { + for j := 0; j < ac; j++ { + if !cEqualWithinAbsOrRel(a.At(i, j), b.At(i, j), epsilon, epsilon) { + return false + } + } + } + return true +} + +// TODO(btracey): Move these into a cmplxs if/when we have one. + +func cEqualWithinAbsOrRel(a, b complex128, absTol, relTol float64) bool { + if cEqualWithinAbs(a, b, absTol) { + return true + } + return cEqualWithinRel(a, b, relTol) +} + +// cEqualWithinAbs returns true if a and b have an absolute +// difference of less than tol. +func cEqualWithinAbs(a, b complex128, tol float64) bool { + return a == b || cmplx.Abs(a-b) <= tol +} + +const minNormalFloat64 = 2.2250738585072014e-308 + +// cEqualWithinRel returns true if the difference between a and b +// is not greater than tol times the greater value. +func cEqualWithinRel(a, b complex128, tol float64) bool { + if a == b { + return true + } + if cmplx.IsNaN(a) || cmplx.IsNaN(b) { + return false + } + // Cannot play the same trick as in floats/scalar because there are multiple + // possible infinities. + if cmplx.IsInf(a) { + if !cmplx.IsInf(b) { + return false + } + ra := real(a) + if math.IsInf(ra, 0) { + if ra == real(b) { + return scalar.EqualWithinRel(imag(a), imag(b), tol) + } + return false + } + if imag(a) == imag(b) { + return scalar.EqualWithinRel(ra, real(b), tol) + } + return false + } + if cmplx.IsInf(b) { + return false + } + + delta := cmplx.Abs(a - b) + if delta <= minNormalFloat64 { + return delta <= tol*minNormalFloat64 + } + return delta/math.Max(cmplx.Abs(a), cmplx.Abs(b)) <= tol +} diff --git a/vendor/gonum.org/v1/gonum/mat/consts.go b/vendor/gonum.org/v1/gonum/mat/consts.go new file mode 100644 index 0000000000..3de3f5bf47 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/mat/consts.go @@ -0,0 +1,15 @@ +// Copyright ©2016 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package mat + +// TriKind represents the triangularity of the matrix. +type TriKind bool + +const ( + // Upper specifies an upper triangular matrix. + Upper TriKind = true + // Lower specifies a lower triangular matrix. + Lower TriKind = false +) diff --git a/vendor/gonum.org/v1/gonum/mat/dense.go b/vendor/gonum.org/v1/gonum/mat/dense.go new file mode 100644 index 0000000000..b08360cc70 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/mat/dense.go @@ -0,0 +1,670 @@ +// Copyright ©2013 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package mat + +import ( + "gonum.org/v1/gonum/blas" + "gonum.org/v1/gonum/blas/blas64" + "gonum.org/v1/gonum/lapack" + "gonum.org/v1/gonum/lapack/lapack64" +) + +var ( + dense *Dense + + _ Matrix = dense + _ allMatrix = dense + _ denseMatrix = dense + _ Mutable = dense + + _ ClonerFrom = dense + _ RowViewer = dense + _ ColViewer = dense + _ RawRowViewer = dense + _ Grower = dense + + _ RawMatrixSetter = dense + _ RawMatrixer = dense + + _ Reseter = dense +) + +// Dense is a dense matrix representation. +type Dense struct { + mat blas64.General + + capRows, capCols int +} + +// NewDense creates a new Dense matrix with r rows and c columns. If data == nil, +// a new slice is allocated for the backing slice. If len(data) == r*c, data is +// used as the backing slice, and changes to the elements of the returned Dense +// will be reflected in data. If neither of these is true, NewDense will panic. +// NewDense will panic if either r or c is zero. +// +// The data must be arranged in row-major order, i.e. the (i*c + j)-th +// element in the data slice is the {i, j}-th element in the matrix. +func NewDense(r, c int, data []float64) *Dense { + if r <= 0 || c <= 0 { + if r == 0 || c == 0 { + panic(ErrZeroLength) + } + panic(ErrNegativeDimension) + } + if data != nil && r*c != len(data) { + panic(ErrShape) + } + if data == nil { + data = make([]float64, r*c) + } + return &Dense{ + mat: blas64.General{ + Rows: r, + Cols: c, + Stride: c, + Data: data, + }, + capRows: r, + capCols: c, + } +} + +// ReuseAs changes the receiver if it IsEmpty() to be of size r×c. +// +// ReuseAs re-uses the backing data slice if it has sufficient capacity, +// otherwise a new slice is allocated. The backing data is zero on return. +// +// ReuseAs panics if the receiver is not empty, and panics if +// the input sizes are less than one. To empty the receiver for re-use, +// Reset should be used. +func (m *Dense) ReuseAs(r, c int) { + if r <= 0 || c <= 0 { + if r == 0 || c == 0 { + panic(ErrZeroLength) + } + panic(ErrNegativeDimension) + } + if !m.IsEmpty() { + panic(ErrReuseNonEmpty) + } + m.reuseAsZeroed(r, c) +} + +// reuseAsNonZeroed resizes an empty matrix to a r×c matrix, +// or checks that a non-empty matrix is r×c. It does not zero +// the data in the receiver. +func (m *Dense) reuseAsNonZeroed(r, c int) { + // reuseAs must be kept in sync with reuseAsZeroed. + if m.mat.Rows > m.capRows || m.mat.Cols > m.capCols { + // Panic as a string, not a mat.Error. + panic(badCap) + } + if r == 0 || c == 0 { + panic(ErrZeroLength) + } + if m.IsEmpty() { + m.mat = blas64.General{ + Rows: r, + Cols: c, + Stride: c, + Data: use(m.mat.Data, r*c), + } + m.capRows = r + m.capCols = c + return + } + if r != m.mat.Rows || c != m.mat.Cols { + panic(ErrShape) + } +} + +// reuseAsZeroed resizes an empty matrix to a r×c matrix, +// or checks that a non-empty matrix is r×c. It zeroes +// all the elements of the matrix. +func (m *Dense) reuseAsZeroed(r, c int) { + // reuseAsZeroed must be kept in sync with reuseAsNonZeroed. + if m.mat.Rows > m.capRows || m.mat.Cols > m.capCols { + // Panic as a string, not a mat.Error. + panic(badCap) + } + if r == 0 || c == 0 { + panic(ErrZeroLength) + } + if m.IsEmpty() { + m.mat = blas64.General{ + Rows: r, + Cols: c, + Stride: c, + Data: useZeroed(m.mat.Data, r*c), + } + m.capRows = r + m.capCols = c + return + } + if r != m.mat.Rows || c != m.mat.Cols { + panic(ErrShape) + } + m.Zero() +} + +// Zero sets all of the matrix elements to zero. +func (m *Dense) Zero() { + r := m.mat.Rows + c := m.mat.Cols + for i := 0; i < r; i++ { + zero(m.mat.Data[i*m.mat.Stride : i*m.mat.Stride+c]) + } +} + +// isolatedWorkspace returns a new dense matrix w with the size of a and +// returns a callback to defer which performs cleanup at the return of the call. +// This should be used when a method receiver is the same pointer as an input argument. +func (m *Dense) isolatedWorkspace(a Matrix) (w *Dense, restore func()) { + r, c := a.Dims() + if r == 0 || c == 0 { + panic(ErrZeroLength) + } + w = getDenseWorkspace(r, c, false) + return w, func() { + m.Copy(w) + putDenseWorkspace(w) + } +} + +// Reset empties the matrix so that it can be reused as the +// receiver of a dimensionally restricted operation. +// +// Reset should not be used when the matrix shares backing data. +// See the Reseter interface for more information. +func (m *Dense) Reset() { + // Row, Cols and Stride must be zeroed in unison. + m.mat.Rows, m.mat.Cols, m.mat.Stride = 0, 0, 0 + m.capRows, m.capCols = 0, 0 + m.mat.Data = m.mat.Data[:0] +} + +// IsEmpty returns whether the receiver is empty. Empty matrices can be the +// receiver for size-restricted operations. The receiver can be emptied using +// Reset. +func (m *Dense) IsEmpty() bool { + // It must be the case that m.Dims() returns + // zeros in this case. See comment in Reset(). + return m.mat.Stride == 0 +} + +// asTriDense returns a TriDense with the given size and side. The backing data +// of the TriDense is the same as the receiver. +func (m *Dense) asTriDense(n int, diag blas.Diag, uplo blas.Uplo) *TriDense { + return &TriDense{ + mat: blas64.Triangular{ + N: n, + Stride: m.mat.Stride, + Data: m.mat.Data, + Uplo: uplo, + Diag: diag, + }, + cap: n, + } +} + +// DenseCopyOf returns a newly allocated copy of the elements of a. +func DenseCopyOf(a Matrix) *Dense { + d := &Dense{} + d.CloneFrom(a) + return d +} + +// SetRawMatrix sets the underlying blas64.General used by the receiver. +// Changes to elements in the receiver following the call will be reflected +// in b. +func (m *Dense) SetRawMatrix(b blas64.General) { + m.capRows, m.capCols = b.Rows, b.Cols + m.mat = b +} + +// RawMatrix returns the underlying blas64.General used by the receiver. +// Changes to elements in the receiver following the call will be reflected +// in returned blas64.General. +func (m *Dense) RawMatrix() blas64.General { return m.mat } + +// Dims returns the number of rows and columns in the matrix. +func (m *Dense) Dims() (r, c int) { return m.mat.Rows, m.mat.Cols } + +// Caps returns the number of rows and columns in the backing matrix. +func (m *Dense) Caps() (r, c int) { return m.capRows, m.capCols } + +// T performs an implicit transpose by returning the receiver inside a Transpose. +func (m *Dense) T() Matrix { + return Transpose{m} +} + +// ColView returns a Vector reflecting the column j, backed by the matrix data. +// +// See ColViewer for more information. +func (m *Dense) ColView(j int) Vector { + var v VecDense + v.ColViewOf(m, j) + return &v +} + +// SetCol sets the values in the specified column of the matrix to the values +// in src. len(src) must equal the number of rows in the receiver. +func (m *Dense) SetCol(j int, src []float64) { + if j >= m.mat.Cols || j < 0 { + panic(ErrColAccess) + } + if len(src) != m.mat.Rows { + panic(ErrColLength) + } + + blas64.Copy( + blas64.Vector{N: m.mat.Rows, Inc: 1, Data: src}, + blas64.Vector{N: m.mat.Rows, Inc: m.mat.Stride, Data: m.mat.Data[j:]}, + ) +} + +// SetRow sets the values in the specified rows of the matrix to the values +// in src. len(src) must equal the number of columns in the receiver. +func (m *Dense) SetRow(i int, src []float64) { + if i >= m.mat.Rows || i < 0 { + panic(ErrRowAccess) + } + if len(src) != m.mat.Cols { + panic(ErrRowLength) + } + + copy(m.rawRowView(i), src) +} + +// RowView returns row i of the matrix data represented as a column vector, +// backed by the matrix data. +// +// See RowViewer for more information. +func (m *Dense) RowView(i int) Vector { + var v VecDense + v.RowViewOf(m, i) + return &v +} + +// RawRowView returns a slice backed by the same array as backing the +// receiver. +func (m *Dense) RawRowView(i int) []float64 { + if i >= m.mat.Rows || i < 0 { + panic(ErrRowAccess) + } + return m.rawRowView(i) +} + +func (m *Dense) rawRowView(i int) []float64 { + return m.mat.Data[i*m.mat.Stride : i*m.mat.Stride+m.mat.Cols] +} + +// DiagView returns the diagonal as a matrix backed by the original data. +func (m *Dense) DiagView() Diagonal { + n := min(m.mat.Rows, m.mat.Cols) + return &DiagDense{ + mat: blas64.Vector{ + N: n, + Inc: m.mat.Stride + 1, + Data: m.mat.Data[:(n-1)*m.mat.Stride+n], + }, + } +} + +// Slice returns a new Matrix that shares backing data with the receiver. +// The returned matrix starts at {i,j} of the receiver and extends k-i rows +// and l-j columns. The final row in the resulting matrix is k-1 and the +// final column is l-1. +// Slice panics with ErrIndexOutOfRange if the slice is outside the capacity +// of the receiver. +func (m *Dense) Slice(i, k, j, l int) Matrix { + return m.slice(i, k, j, l) +} + +func (m *Dense) slice(i, k, j, l int) *Dense { + mr, mc := m.Caps() + if i < 0 || mr <= i || j < 0 || mc <= j || k < i || mr < k || l < j || mc < l { + if i == k || j == l { + panic(ErrZeroLength) + } + panic(ErrIndexOutOfRange) + } + t := *m + t.mat.Data = t.mat.Data[i*t.mat.Stride+j : (k-1)*t.mat.Stride+l] + t.mat.Rows = k - i + t.mat.Cols = l - j + t.capRows -= i + t.capCols -= j + return &t +} + +// Grow returns the receiver expanded by r rows and c columns. If the dimensions +// of the expanded matrix are outside the capacities of the receiver a new +// allocation is made, otherwise not. Note the receiver itself is not modified +// during the call to Grow. +func (m *Dense) Grow(r, c int) Matrix { + if r < 0 || c < 0 { + panic(ErrIndexOutOfRange) + } + if r == 0 && c == 0 { + return m + } + + r += m.mat.Rows + c += m.mat.Cols + + var t Dense + switch { + case m.mat.Rows == 0 || m.mat.Cols == 0: + t.mat = blas64.General{ + Rows: r, + Cols: c, + Stride: c, + // We zero because we don't know how the matrix will be used. + // In other places, the mat is immediately filled with a result; + // this is not the case here. + Data: useZeroed(m.mat.Data, r*c), + } + case r > m.capRows || c > m.capCols: + cr := max(r, m.capRows) + cc := max(c, m.capCols) + t.mat = blas64.General{ + Rows: r, + Cols: c, + Stride: cc, + Data: make([]float64, cr*cc), + } + t.capRows = cr + t.capCols = cc + // Copy the complete matrix over to the new matrix. + // Including elements not currently visible. Use a temporary structure + // to avoid modifying the receiver. + var tmp Dense + tmp.mat = blas64.General{ + Rows: m.mat.Rows, + Cols: m.mat.Cols, + Stride: m.mat.Stride, + Data: m.mat.Data, + } + tmp.capRows = m.capRows + tmp.capCols = m.capCols + t.Copy(&tmp) + return &t + default: + t.mat = blas64.General{ + Data: m.mat.Data[:(r-1)*m.mat.Stride+c], + Rows: r, + Cols: c, + Stride: m.mat.Stride, + } + } + t.capRows = r + t.capCols = c + return &t +} + +// CloneFrom makes a copy of a into the receiver, overwriting the previous value of +// the receiver. The clone from operation does not make any restriction on shape and +// will not cause shadowing. +// +// See the ClonerFrom interface for more information. +func (m *Dense) CloneFrom(a Matrix) { + r, c := a.Dims() + mat := blas64.General{ + Rows: r, + Cols: c, + Stride: c, + } + m.capRows, m.capCols = r, c + + aU, trans := untransposeExtract(a) + switch aU := aU.(type) { + case *Dense: + amat := aU.mat + mat.Data = make([]float64, r*c) + if trans { + for i := 0; i < r; i++ { + blas64.Copy(blas64.Vector{N: c, Inc: amat.Stride, Data: amat.Data[i : i+(c-1)*amat.Stride+1]}, + blas64.Vector{N: c, Inc: 1, Data: mat.Data[i*c : (i+1)*c]}) + } + } else { + for i := 0; i < r; i++ { + copy(mat.Data[i*c:(i+1)*c], amat.Data[i*amat.Stride:i*amat.Stride+c]) + } + } + case *VecDense: + amat := aU.mat + mat.Data = make([]float64, aU.mat.N) + blas64.Copy(blas64.Vector{N: aU.mat.N, Inc: amat.Inc, Data: amat.Data}, + blas64.Vector{N: aU.mat.N, Inc: 1, Data: mat.Data}) + default: + mat.Data = make([]float64, r*c) + w := *m + w.mat = mat + for i := 0; i < r; i++ { + for j := 0; j < c; j++ { + w.set(i, j, a.At(i, j)) + } + } + *m = w + return + } + m.mat = mat +} + +// Copy makes a copy of elements of a into the receiver. It is similar to the +// built-in copy; it copies as much as the overlap between the two matrices and +// returns the number of rows and columns it copied. If a aliases the receiver +// and is a transposed Dense or VecDense, with a non-unitary increment, Copy will +// panic. +// +// See the Copier interface for more information. +func (m *Dense) Copy(a Matrix) (r, c int) { + r, c = a.Dims() + if a == m { + return r, c + } + r = min(r, m.mat.Rows) + c = min(c, m.mat.Cols) + if r == 0 || c == 0 { + return 0, 0 + } + + aU, trans := untransposeExtract(a) + switch aU := aU.(type) { + case *Dense: + amat := aU.mat + if trans { + if amat.Stride != 1 { + m.checkOverlap(amat) + } + for i := 0; i < r; i++ { + blas64.Copy(blas64.Vector{N: c, Inc: amat.Stride, Data: amat.Data[i : i+(c-1)*amat.Stride+1]}, + blas64.Vector{N: c, Inc: 1, Data: m.mat.Data[i*m.mat.Stride : i*m.mat.Stride+c]}) + } + } else { + switch o := offset(m.mat.Data, amat.Data); { + case o < 0: + for i := r - 1; i >= 0; i-- { + copy(m.mat.Data[i*m.mat.Stride:i*m.mat.Stride+c], amat.Data[i*amat.Stride:i*amat.Stride+c]) + } + case o > 0: + for i := 0; i < r; i++ { + copy(m.mat.Data[i*m.mat.Stride:i*m.mat.Stride+c], amat.Data[i*amat.Stride:i*amat.Stride+c]) + } + default: + // Nothing to do. + } + } + case *VecDense: + var n, stride int + amat := aU.mat + if trans { + if amat.Inc != 1 { + m.checkOverlap(aU.asGeneral()) + } + n = c + stride = 1 + } else { + n = r + stride = m.mat.Stride + } + if amat.Inc == 1 && stride == 1 { + copy(m.mat.Data, amat.Data[:n]) + break + } + switch o := offset(m.mat.Data, amat.Data); { + case o < 0: + blas64.Copy(blas64.Vector{N: n, Inc: -amat.Inc, Data: amat.Data}, + blas64.Vector{N: n, Inc: -stride, Data: m.mat.Data}) + case o > 0: + blas64.Copy(blas64.Vector{N: n, Inc: amat.Inc, Data: amat.Data}, + blas64.Vector{N: n, Inc: stride, Data: m.mat.Data}) + default: + // Nothing to do. + } + default: + m.checkOverlapMatrix(aU) + for i := 0; i < r; i++ { + for j := 0; j < c; j++ { + m.set(i, j, a.At(i, j)) + } + } + } + + return r, c +} + +// Stack appends the rows of b onto the rows of a, placing the result into the +// receiver with b placed in the greater indexed rows. Stack will panic if the +// two input matrices do not have the same number of columns or the constructed +// stacked matrix is not the same shape as the receiver. +func (m *Dense) Stack(a, b Matrix) { + ar, ac := a.Dims() + br, bc := b.Dims() + if ac != bc || m == a || m == b { + panic(ErrShape) + } + + m.reuseAsNonZeroed(ar+br, ac) + + m.Copy(a) + w := m.slice(ar, ar+br, 0, bc) + w.Copy(b) +} + +// Augment creates the augmented matrix of a and b, where b is placed in the +// greater indexed columns. Augment will panic if the two input matrices do +// not have the same number of rows or the constructed augmented matrix is +// not the same shape as the receiver. +func (m *Dense) Augment(a, b Matrix) { + ar, ac := a.Dims() + br, bc := b.Dims() + if ar != br || m == a || m == b { + panic(ErrShape) + } + + m.reuseAsNonZeroed(ar, ac+bc) + + m.Copy(a) + w := m.slice(0, br, ac, ac+bc) + w.Copy(b) +} + +// Trace returns the trace of the matrix. +// +// Trace will panic with ErrSquare if the matrix is not square and with +// ErrZeroLength if the matrix has zero size. +func (m *Dense) Trace() float64 { + r, c := m.Dims() + if r != c { + panic(ErrSquare) + } + if m.IsEmpty() { + panic(ErrZeroLength) + } + // TODO(btracey): could use internal asm sum routine. + var v float64 + for i := 0; i < m.mat.Rows; i++ { + v += m.mat.Data[i*m.mat.Stride+i] + } + return v +} + +// Norm returns the specified norm of the receiver. Valid norms are: +// +// 1 - The maximum absolute column sum +// 2 - The Frobenius norm, the square root of the sum of the squares of the elements +// Inf - The maximum absolute row sum +// +// Norm will panic with ErrNormOrder if an illegal norm is specified and with +// ErrShape if the matrix has zero size. +func (m *Dense) Norm(norm float64) float64 { + if m.IsEmpty() { + panic(ErrZeroLength) + } + lnorm := normLapack(norm, false) + if lnorm == lapack.MaxColumnSum { + work := getFloat64s(m.mat.Cols, false) + defer putFloat64s(work) + return lapack64.Lange(lnorm, m.mat, work) + } + return lapack64.Lange(lnorm, m.mat, nil) +} + +// Permutation constructs an n×n permutation matrix P from the given +// row permutation such that the nonzero entries are P[i,p[i]] = 1. +func (m *Dense) Permutation(n int, p []int) { + if len(p) != n { + panic(badSliceLength) + } + m.reuseAsZeroed(n, n) + for i, v := range p { + if v < 0 || v >= n { + panic(ErrRowAccess) + } + m.mat.Data[i*m.mat.Stride+v] = 1 + } +} + +// PermuteRows rearranges the rows of the m×n matrix A in the receiver as +// specified by the permutation p[0],p[1],...,p[m-1] of the integers 0,...,m-1. +// +// If inverse is false, the given permutation is applied: +// +// A[p[i],0:n] is moved to A[i,0:n] for i=0,1,...,m-1. +// +// If inverse is true, the inverse permutation is applied: +// +// A[i,0:n] is moved to A[p[i],0:n] for i=0,1,...,m-1. +// +// p must have length m, otherwise PermuteRows will panic. +func (m *Dense) PermuteRows(p []int, inverse bool) { + r, _ := m.Dims() + if len(p) != r { + panic(badSliceLength) + } + lapack64.Lapmr(!inverse, m.mat, p) +} + +// PermuteCols rearranges the columns of the m×n matrix A in the reciever as +// specified by the permutation p[0],p[1],...,p[n-1] of the integers 0,...,n-1. +// +// If inverse is false, the given permutation is applied: +// +// A[0:m,p[j]] is moved to A[0:m,j] for j = 0, 1, ..., n-1. +// +// If inverse is true, the inverse permutation is applied: +// +// A[0:m,j] is moved to A[0:m,p[j]] for j = 0, 1, ..., n-1. +// +// p must have length n, otherwise PermuteCols will panic. +func (m *Dense) PermuteCols(p []int, inverse bool) { + _, c := m.Dims() + if len(p) != c { + panic(badSliceLength) + } + lapack64.Lapmt(!inverse, m.mat, p) +} diff --git a/vendor/gonum.org/v1/gonum/mat/dense_arithmetic.go b/vendor/gonum.org/v1/gonum/mat/dense_arithmetic.go new file mode 100644 index 0000000000..259ee13d51 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/mat/dense_arithmetic.go @@ -0,0 +1,877 @@ +// Copyright ©2013 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package mat + +import ( + "math" + + "gonum.org/v1/gonum/blas" + "gonum.org/v1/gonum/blas/blas64" + "gonum.org/v1/gonum/lapack/lapack64" +) + +// Add adds a and b element-wise, placing the result in the receiver. Add +// will panic if the two matrices do not have the same shape. +func (m *Dense) Add(a, b Matrix) { + ar, ac := a.Dims() + br, bc := b.Dims() + if ar != br || ac != bc { + panic(ErrShape) + } + + aU, aTrans := untransposeExtract(a) + bU, bTrans := untransposeExtract(b) + m.reuseAsNonZeroed(ar, ac) + + if arm, ok := a.(*Dense); ok { + if brm, ok := b.(*Dense); ok { + amat, bmat := arm.mat, brm.mat + if m != aU { + m.checkOverlap(amat) + } + if m != bU { + m.checkOverlap(bmat) + } + for ja, jb, jm := 0, 0, 0; ja < ar*amat.Stride; ja, jb, jm = ja+amat.Stride, jb+bmat.Stride, jm+m.mat.Stride { + for i, v := range amat.Data[ja : ja+ac] { + m.mat.Data[i+jm] = v + bmat.Data[i+jb] + } + } + return + } + } + + m.checkOverlapMatrix(aU) + m.checkOverlapMatrix(bU) + var restore func() + if aTrans && m == aU { + m, restore = m.isolatedWorkspace(aU) + defer restore() + } else if bTrans && m == bU { + m, restore = m.isolatedWorkspace(bU) + defer restore() + } + + for r := 0; r < ar; r++ { + for c := 0; c < ac; c++ { + m.set(r, c, a.At(r, c)+b.At(r, c)) + } + } +} + +// Sub subtracts the matrix b from a, placing the result in the receiver. Sub +// will panic if the two matrices do not have the same shape. +func (m *Dense) Sub(a, b Matrix) { + ar, ac := a.Dims() + br, bc := b.Dims() + if ar != br || ac != bc { + panic(ErrShape) + } + + aU, aTrans := untransposeExtract(a) + bU, bTrans := untransposeExtract(b) + m.reuseAsNonZeroed(ar, ac) + + if arm, ok := a.(*Dense); ok { + if brm, ok := b.(*Dense); ok { + amat, bmat := arm.mat, brm.mat + if m != aU { + m.checkOverlap(amat) + } + if m != bU { + m.checkOverlap(bmat) + } + for ja, jb, jm := 0, 0, 0; ja < ar*amat.Stride; ja, jb, jm = ja+amat.Stride, jb+bmat.Stride, jm+m.mat.Stride { + for i, v := range amat.Data[ja : ja+ac] { + m.mat.Data[i+jm] = v - bmat.Data[i+jb] + } + } + return + } + } + + m.checkOverlapMatrix(aU) + m.checkOverlapMatrix(bU) + var restore func() + if aTrans && m == aU { + m, restore = m.isolatedWorkspace(aU) + defer restore() + } else if bTrans && m == bU { + m, restore = m.isolatedWorkspace(bU) + defer restore() + } + + for r := 0; r < ar; r++ { + for c := 0; c < ac; c++ { + m.set(r, c, a.At(r, c)-b.At(r, c)) + } + } +} + +// MulElem performs element-wise multiplication of a and b, placing the result +// in the receiver. MulElem will panic if the two matrices do not have the same +// shape. +func (m *Dense) MulElem(a, b Matrix) { + ar, ac := a.Dims() + br, bc := b.Dims() + if ar != br || ac != bc { + panic(ErrShape) + } + + aU, aTrans := untransposeExtract(a) + bU, bTrans := untransposeExtract(b) + m.reuseAsNonZeroed(ar, ac) + + if arm, ok := a.(*Dense); ok { + if brm, ok := b.(*Dense); ok { + amat, bmat := arm.mat, brm.mat + if m != aU { + m.checkOverlap(amat) + } + if m != bU { + m.checkOverlap(bmat) + } + for ja, jb, jm := 0, 0, 0; ja < ar*amat.Stride; ja, jb, jm = ja+amat.Stride, jb+bmat.Stride, jm+m.mat.Stride { + for i, v := range amat.Data[ja : ja+ac] { + m.mat.Data[i+jm] = v * bmat.Data[i+jb] + } + } + return + } + } + + m.checkOverlapMatrix(aU) + m.checkOverlapMatrix(bU) + var restore func() + if aTrans && m == aU { + m, restore = m.isolatedWorkspace(aU) + defer restore() + } else if bTrans && m == bU { + m, restore = m.isolatedWorkspace(bU) + defer restore() + } + + for r := 0; r < ar; r++ { + for c := 0; c < ac; c++ { + m.set(r, c, a.At(r, c)*b.At(r, c)) + } + } +} + +// DivElem performs element-wise division of a by b, placing the result +// in the receiver. DivElem will panic if the two matrices do not have the same +// shape. +func (m *Dense) DivElem(a, b Matrix) { + ar, ac := a.Dims() + br, bc := b.Dims() + if ar != br || ac != bc { + panic(ErrShape) + } + + aU, aTrans := untransposeExtract(a) + bU, bTrans := untransposeExtract(b) + m.reuseAsNonZeroed(ar, ac) + + if arm, ok := a.(*Dense); ok { + if brm, ok := b.(*Dense); ok { + amat, bmat := arm.mat, brm.mat + if m != aU { + m.checkOverlap(amat) + } + if m != bU { + m.checkOverlap(bmat) + } + for ja, jb, jm := 0, 0, 0; ja < ar*amat.Stride; ja, jb, jm = ja+amat.Stride, jb+bmat.Stride, jm+m.mat.Stride { + for i, v := range amat.Data[ja : ja+ac] { + m.mat.Data[i+jm] = v / bmat.Data[i+jb] + } + } + return + } + } + + m.checkOverlapMatrix(aU) + m.checkOverlapMatrix(bU) + var restore func() + if aTrans && m == aU { + m, restore = m.isolatedWorkspace(aU) + defer restore() + } else if bTrans && m == bU { + m, restore = m.isolatedWorkspace(bU) + defer restore() + } + + for r := 0; r < ar; r++ { + for c := 0; c < ac; c++ { + m.set(r, c, a.At(r, c)/b.At(r, c)) + } + } +} + +// Inverse computes the inverse of the matrix a, storing the result into the +// receiver. If a is ill-conditioned, a Condition error will be returned. +// Note that matrix inversion is numerically unstable, and should generally +// be avoided where possible, for example by using the Solve routines. +func (m *Dense) Inverse(a Matrix) error { + // TODO(btracey): Special case for RawTriangular, etc. + r, c := a.Dims() + if r != c { + panic(ErrSquare) + } + m.reuseAsNonZeroed(a.Dims()) + aU, aTrans := untransposeExtract(a) + switch rm := aU.(type) { + case *Dense: + if m != aU || aTrans { + if m == aU || m.checkOverlap(rm.mat) { + tmp := getDenseWorkspace(r, c, false) + tmp.Copy(a) + m.Copy(tmp) + putDenseWorkspace(tmp) + break + } + m.Copy(a) + } + default: + m.Copy(a) + } + // Compute the norm of A. + work := getFloat64s(4*r, false) // Length must be at least 4*r for Gecon. + norm := lapack64.Lange(CondNorm, m.mat, work) + // Compute the LU factorization of A. + ipiv := getInts(r, false) + defer putInts(ipiv) + ok := lapack64.Getrf(m.mat, ipiv) + if !ok { + // A is exactly singular. + return Condition(math.Inf(1)) + } + // Compute the condition number of A using the LU factorization. + iwork := getInts(r, false) + defer putInts(iwork) + rcond := lapack64.Gecon(CondNorm, m.mat, norm, work, iwork) + // Compute A^{-1} from the LU factorization regardless of the value of rcond. + lapack64.Getri(m.mat, ipiv, work, -1) + if int(work[0]) > len(work) { + l := int(work[0]) + putFloat64s(work) + work = getFloat64s(l, false) + } + defer putFloat64s(work) + ok = lapack64.Getri(m.mat, ipiv, work, len(work)) + if !ok || rcond == 0 { + // A is exactly singular. + return Condition(math.Inf(1)) + } + // Check whether A is singular for computational purposes. + cond := 1 / rcond + if cond > ConditionTolerance { + return Condition(cond) + } + return nil +} + +// Mul takes the matrix product of a and b, placing the result in the receiver. +// If the number of columns in a does not equal the number of rows in b, Mul will panic. +func (m *Dense) Mul(a, b Matrix) { + ar, ac := a.Dims() + br, bc := b.Dims() + + if ac != br { + panic(ErrShape) + } + + aU, aTrans := untransposeExtract(a) + bU, bTrans := untransposeExtract(b) + m.reuseAsNonZeroed(ar, bc) + var restore func() + if m == aU { + m, restore = m.isolatedWorkspace(aU) + defer restore() + } else if m == bU { + m, restore = m.isolatedWorkspace(bU) + defer restore() + } + aT := blas.NoTrans + if aTrans { + aT = blas.Trans + } + bT := blas.NoTrans + if bTrans { + bT = blas.Trans + } + + // Some of the cases do not have a transpose option, so create + // temporary memory. + // C = Aᵀ * B = (Bᵀ * A)ᵀ + // Cᵀ = Bᵀ * A. + if aU, ok := aU.(*Dense); ok { + if restore == nil { + m.checkOverlap(aU.mat) + } + switch bU := bU.(type) { + case *Dense: + if restore == nil { + m.checkOverlap(bU.mat) + } + blas64.Gemm(aT, bT, 1, aU.mat, bU.mat, 0, m.mat) + return + + case *SymDense: + if aTrans { + c := getDenseWorkspace(ac, ar, false) + blas64.Symm(blas.Left, 1, bU.mat, aU.mat, 0, c.mat) + strictCopy(m, c.T()) + putDenseWorkspace(c) + return + } + blas64.Symm(blas.Right, 1, bU.mat, aU.mat, 0, m.mat) + return + + case *TriDense: + // Trmm updates in place, so copy aU first. + if aTrans { + c := getDenseWorkspace(ac, ar, false) + var tmp Dense + tmp.SetRawMatrix(aU.mat) + c.Copy(&tmp) + bT := blas.Trans + if bTrans { + bT = blas.NoTrans + } + blas64.Trmm(blas.Left, bT, 1, bU.mat, c.mat) + strictCopy(m, c.T()) + putDenseWorkspace(c) + return + } + m.Copy(a) + blas64.Trmm(blas.Right, bT, 1, bU.mat, m.mat) + return + + case *VecDense: + m.checkOverlap(bU.asGeneral()) + bvec := bU.RawVector() + if bTrans { + // {ar,1} x {1,bc}, which is not a vector. + // Instead, construct B as a General. + bmat := blas64.General{ + Rows: bc, + Cols: 1, + Stride: bvec.Inc, + Data: bvec.Data, + } + blas64.Gemm(aT, bT, 1, aU.mat, bmat, 0, m.mat) + return + } + cvec := blas64.Vector{ + Inc: m.mat.Stride, + Data: m.mat.Data, + } + blas64.Gemv(aT, 1, aU.mat, bvec, 0, cvec) + return + } + } + if bU, ok := bU.(*Dense); ok { + if restore == nil { + m.checkOverlap(bU.mat) + } + switch aU := aU.(type) { + case *SymDense: + if bTrans { + c := getDenseWorkspace(bc, br, false) + blas64.Symm(blas.Right, 1, aU.mat, bU.mat, 0, c.mat) + strictCopy(m, c.T()) + putDenseWorkspace(c) + return + } + blas64.Symm(blas.Left, 1, aU.mat, bU.mat, 0, m.mat) + return + + case *TriDense: + // Trmm updates in place, so copy bU first. + if bTrans { + c := getDenseWorkspace(bc, br, false) + var tmp Dense + tmp.SetRawMatrix(bU.mat) + c.Copy(&tmp) + aT := blas.Trans + if aTrans { + aT = blas.NoTrans + } + blas64.Trmm(blas.Right, aT, 1, aU.mat, c.mat) + strictCopy(m, c.T()) + putDenseWorkspace(c) + return + } + m.Copy(b) + blas64.Trmm(blas.Left, aT, 1, aU.mat, m.mat) + return + + case *VecDense: + m.checkOverlap(aU.asGeneral()) + avec := aU.RawVector() + if aTrans { + // {1,ac} x {ac, bc} + // Transpose B so that the vector is on the right. + cvec := blas64.Vector{ + Inc: 1, + Data: m.mat.Data, + } + bT := blas.Trans + if bTrans { + bT = blas.NoTrans + } + blas64.Gemv(bT, 1, bU.mat, avec, 0, cvec) + return + } + // {ar,1} x {1,bc} which is not a vector result. + // Instead, construct A as a General. + amat := blas64.General{ + Rows: ar, + Cols: 1, + Stride: avec.Inc, + Data: avec.Data, + } + blas64.Gemm(aT, bT, 1, amat, bU.mat, 0, m.mat) + return + } + } + + m.checkOverlapMatrix(aU) + m.checkOverlapMatrix(bU) + row := getFloat64s(ac, false) + defer putFloat64s(row) + for r := 0; r < ar; r++ { + for i := range row { + row[i] = a.At(r, i) + } + for c := 0; c < bc; c++ { + var v float64 + for i, e := range row { + v += e * b.At(i, c) + } + m.mat.Data[r*m.mat.Stride+c] = v + } + } +} + +// strictCopy copies a into m panicking if the shape of a and m differ. +func strictCopy(m *Dense, a Matrix) { + r, c := m.Copy(a) + if r != m.mat.Rows || c != m.mat.Cols { + // Panic with a string since this + // is not a user-facing panic. + panic(ErrShape.Error()) + } +} + +// Exp calculates the exponential of the matrix a, e^a, placing the result +// in the receiver. Exp will panic with ErrShape if a is not square. +func (m *Dense) Exp(a Matrix) { + // The implementation used here is from Functions of Matrices: Theory and Computation + // Chapter 10, Algorithm 10.20. https://doi.org/10.1137/1.9780898717778.ch10 + + r, c := a.Dims() + if r != c { + panic(ErrShape) + } + + m.reuseAsNonZeroed(r, r) + if r == 1 { + m.mat.Data[0] = math.Exp(a.At(0, 0)) + return + } + + pade := []struct { + theta float64 + b []float64 + }{ + {theta: 0.015, b: []float64{ + 120, 60, 12, 1, + }}, + {theta: 0.25, b: []float64{ + 30240, 15120, 3360, 420, 30, 1, + }}, + {theta: 0.95, b: []float64{ + 17297280, 8648640, 1995840, 277200, 25200, 1512, 56, 1, + }}, + {theta: 2.1, b: []float64{ + 17643225600, 8821612800, 2075673600, 302702400, 30270240, 2162160, 110880, 3960, 90, 1, + }}, + } + + a1 := m + a1.Copy(a) + v := getDenseWorkspace(r, r, true) + vraw := v.RawMatrix() + n := r * r + vvec := blas64.Vector{N: n, Inc: 1, Data: vraw.Data} + defer putDenseWorkspace(v) + + u := getDenseWorkspace(r, r, true) + uraw := u.RawMatrix() + uvec := blas64.Vector{N: n, Inc: 1, Data: uraw.Data} + defer putDenseWorkspace(u) + + a2 := getDenseWorkspace(r, r, false) + defer putDenseWorkspace(a2) + + n1 := Norm(a, 1) + for i, t := range pade { + if n1 > t.theta { + continue + } + + // This loop only executes once, so + // this is not as horrible as it looks. + p := getDenseWorkspace(r, r, true) + praw := p.RawMatrix() + pvec := blas64.Vector{N: n, Inc: 1, Data: praw.Data} + defer putDenseWorkspace(p) + + for k := 0; k < r; k++ { + p.set(k, k, 1) + v.set(k, k, t.b[0]) + u.set(k, k, t.b[1]) + } + + a2.Mul(a1, a1) + for j := 0; j <= i; j++ { + p.Mul(p, a2) + blas64.Axpy(t.b[2*j+2], pvec, vvec) + blas64.Axpy(t.b[2*j+3], pvec, uvec) + } + u.Mul(a1, u) + + // Use p as a workspace here and + // rename u for the second call's + // receiver. + vmu, vpu := u, p + vpu.Add(v, u) + vmu.Sub(v, u) + + _ = m.Solve(vmu, vpu) + return + } + + // Remaining Padé table line. + const theta13 = 5.4 + b := [...]float64{ + 64764752532480000, 32382376266240000, 7771770303897600, 1187353796428800, + 129060195264000, 10559470521600, 670442572800, 33522128640, + 1323241920, 40840800, 960960, 16380, 182, 1, + } + + s := math.Log2(n1 / theta13) + if s >= 0 { + s = math.Ceil(s) + a1.Scale(1/math.Pow(2, s), a1) + } + a2.Mul(a1, a1) + + i := getDenseWorkspace(r, r, true) + for j := 0; j < r; j++ { + i.set(j, j, 1) + } + iraw := i.RawMatrix() + ivec := blas64.Vector{N: n, Inc: 1, Data: iraw.Data} + defer putDenseWorkspace(i) + + a2raw := a2.RawMatrix() + a2vec := blas64.Vector{N: n, Inc: 1, Data: a2raw.Data} + + a4 := getDenseWorkspace(r, r, false) + a4raw := a4.RawMatrix() + a4vec := blas64.Vector{N: n, Inc: 1, Data: a4raw.Data} + defer putDenseWorkspace(a4) + a4.Mul(a2, a2) + + a6 := getDenseWorkspace(r, r, false) + a6raw := a6.RawMatrix() + a6vec := blas64.Vector{N: n, Inc: 1, Data: a6raw.Data} + defer putDenseWorkspace(a6) + a6.Mul(a2, a4) + + // V = A_6(b_12*A_6 + b_10*A_4 + b_8*A_2) + b_6*A_6 + b_4*A_4 + b_2*A_2 +b_0*I + blas64.Axpy(b[12], a6vec, vvec) + blas64.Axpy(b[10], a4vec, vvec) + blas64.Axpy(b[8], a2vec, vvec) + v.Mul(v, a6) + blas64.Axpy(b[6], a6vec, vvec) + blas64.Axpy(b[4], a4vec, vvec) + blas64.Axpy(b[2], a2vec, vvec) + blas64.Axpy(b[0], ivec, vvec) + + // U = A(A_6(b_13*A_6 + b_11*A_4 + b_9*A_2) + b_7*A_6 + b_5*A_4 + b_2*A_3 +b_1*I) + blas64.Axpy(b[13], a6vec, uvec) + blas64.Axpy(b[11], a4vec, uvec) + blas64.Axpy(b[9], a2vec, uvec) + u.Mul(u, a6) + blas64.Axpy(b[7], a6vec, uvec) + blas64.Axpy(b[5], a4vec, uvec) + blas64.Axpy(b[3], a2vec, uvec) + blas64.Axpy(b[1], ivec, uvec) + u.Mul(u, a1) + + // Use i as a workspace here and + // rename u for the second call's + // receiver. + vmu, vpu := u, i + vpu.Add(v, u) + vmu.Sub(v, u) + + _ = m.Solve(vmu, vpu) + + for ; s > 0; s-- { + m.Mul(m, m) + } +} + +// Pow calculates the integral power of the matrix a to n, placing the result +// in the receiver. Pow will panic if n is negative or if a is not square. +func (m *Dense) Pow(a Matrix, n int) { + if n < 0 { + panic("mat: illegal power") + } + r, c := a.Dims() + if r != c { + panic(ErrShape) + } + + m.reuseAsNonZeroed(r, c) + + // Take possible fast paths. + switch n { + case 0: + for i := 0; i < r; i++ { + zero(m.mat.Data[i*m.mat.Stride : i*m.mat.Stride+c]) + m.mat.Data[i*m.mat.Stride+i] = 1 + } + return + case 1: + m.Copy(a) + return + case 2: + m.Mul(a, a) + return + } + + // Perform iterative exponentiation by squaring in work space. + w := getDenseWorkspace(r, r, false) + w.Copy(a) + s := getDenseWorkspace(r, r, false) + s.Copy(a) + x := getDenseWorkspace(r, r, false) + for n--; n > 0; n >>= 1 { + if n&1 != 0 { + x.Mul(w, s) + w, x = x, w + } + if n != 1 { + x.Mul(s, s) + s, x = x, s + } + } + m.Copy(w) + putDenseWorkspace(w) + putDenseWorkspace(s) + putDenseWorkspace(x) +} + +// Kronecker calculates the Kronecker product of a and b, placing the result in +// the receiver. +func (m *Dense) Kronecker(a, b Matrix) { + ra, ca := a.Dims() + rb, cb := b.Dims() + + m.reuseAsNonZeroed(ra*rb, ca*cb) + for i := 0; i < ra; i++ { + for j := 0; j < ca; j++ { + m.slice(i*rb, (i+1)*rb, j*cb, (j+1)*cb).Scale(a.At(i, j), b) + } + } +} + +// Scale multiplies the elements of a by f, placing the result in the receiver. +// +// See the Scaler interface for more information. +func (m *Dense) Scale(f float64, a Matrix) { + ar, ac := a.Dims() + + m.reuseAsNonZeroed(ar, ac) + + aU, aTrans := untransposeExtract(a) + if rm, ok := aU.(*Dense); ok { + amat := rm.mat + if m == aU || m.checkOverlap(amat) { + var restore func() + m, restore = m.isolatedWorkspace(a) + defer restore() + } + if !aTrans { + for ja, jm := 0, 0; ja < ar*amat.Stride; ja, jm = ja+amat.Stride, jm+m.mat.Stride { + for i, v := range amat.Data[ja : ja+ac] { + m.mat.Data[i+jm] = v * f + } + } + } else { + for ja, jm := 0, 0; ja < ac*amat.Stride; ja, jm = ja+amat.Stride, jm+1 { + for i, v := range amat.Data[ja : ja+ar] { + m.mat.Data[i*m.mat.Stride+jm] = v * f + } + } + } + return + } + + m.checkOverlapMatrix(a) + for r := 0; r < ar; r++ { + for c := 0; c < ac; c++ { + m.set(r, c, f*a.At(r, c)) + } + } +} + +// Apply applies the function fn to each of the elements of a, placing the +// resulting matrix in the receiver. The function fn takes a row/column +// index and element value and returns some function of that tuple. +func (m *Dense) Apply(fn func(i, j int, v float64) float64, a Matrix) { + ar, ac := a.Dims() + + m.reuseAsNonZeroed(ar, ac) + + aU, aTrans := untransposeExtract(a) + if rm, ok := aU.(*Dense); ok { + amat := rm.mat + if m == aU || m.checkOverlap(amat) { + var restore func() + m, restore = m.isolatedWorkspace(a) + defer restore() + } + if !aTrans { + for j, ja, jm := 0, 0, 0; ja < ar*amat.Stride; j, ja, jm = j+1, ja+amat.Stride, jm+m.mat.Stride { + for i, v := range amat.Data[ja : ja+ac] { + m.mat.Data[i+jm] = fn(j, i, v) + } + } + } else { + for j, ja, jm := 0, 0, 0; ja < ac*amat.Stride; j, ja, jm = j+1, ja+amat.Stride, jm+1 { + for i, v := range amat.Data[ja : ja+ar] { + m.mat.Data[i*m.mat.Stride+jm] = fn(i, j, v) + } + } + } + return + } + + m.checkOverlapMatrix(a) + for r := 0; r < ar; r++ { + for c := 0; c < ac; c++ { + m.set(r, c, fn(r, c, a.At(r, c))) + } + } +} + +// RankOne performs a rank-one update to the matrix a with the vectors x and +// y, where x and y are treated as column vectors. The result is stored in the +// receiver. The Outer method can be used instead of RankOne if a is not needed. +// +// m = a + alpha * x * yᵀ +func (m *Dense) RankOne(a Matrix, alpha float64, x, y Vector) { + ar, ac := a.Dims() + if x.Len() != ar { + panic(ErrShape) + } + if y.Len() != ac { + panic(ErrShape) + } + + if a != m { + aU, _ := untransposeExtract(a) + if rm, ok := aU.(*Dense); ok { + m.checkOverlap(rm.RawMatrix()) + } + } + + var xmat, ymat blas64.Vector + fast := true + xU, _ := untransposeExtract(x) + if rv, ok := xU.(*VecDense); ok { + r, c := xU.Dims() + xmat = rv.mat + m.checkOverlap(generalFromVector(xmat, r, c)) + } else { + fast = false + } + yU, _ := untransposeExtract(y) + if rv, ok := yU.(*VecDense); ok { + r, c := yU.Dims() + ymat = rv.mat + m.checkOverlap(generalFromVector(ymat, r, c)) + } else { + fast = false + } + + if fast { + if m != a { + m.reuseAsNonZeroed(ar, ac) + m.Copy(a) + } + blas64.Ger(alpha, xmat, ymat, m.mat) + return + } + + m.reuseAsNonZeroed(ar, ac) + for i := 0; i < ar; i++ { + for j := 0; j < ac; j++ { + m.set(i, j, a.At(i, j)+alpha*x.AtVec(i)*y.AtVec(j)) + } + } +} + +// Outer calculates the outer product of the vectors x and y, where x and y +// are treated as column vectors, and stores the result in the receiver. +// +// m = alpha * x * yᵀ +// +// In order to update an existing matrix, see RankOne. +func (m *Dense) Outer(alpha float64, x, y Vector) { + r, c := x.Len(), y.Len() + + m.reuseAsZeroed(r, c) + + var xmat, ymat blas64.Vector + fast := true + xU, _ := untransposeExtract(x) + if rv, ok := xU.(*VecDense); ok { + r, c := xU.Dims() + xmat = rv.mat + m.checkOverlap(generalFromVector(xmat, r, c)) + } else { + fast = false + } + yU, _ := untransposeExtract(y) + if rv, ok := yU.(*VecDense); ok { + r, c := yU.Dims() + ymat = rv.mat + m.checkOverlap(generalFromVector(ymat, r, c)) + } else { + fast = false + } + + if fast { + for i := 0; i < r; i++ { + zero(m.mat.Data[i*m.mat.Stride : i*m.mat.Stride+c]) + } + blas64.Ger(alpha, xmat, ymat, m.mat) + return + } + + for i := 0; i < r; i++ { + for j := 0; j < c; j++ { + m.set(i, j, alpha*x.AtVec(i)*y.AtVec(j)) + } + } +} diff --git a/vendor/gonum.org/v1/gonum/mat/diagonal.go b/vendor/gonum.org/v1/gonum/mat/diagonal.go new file mode 100644 index 0000000000..c42f70c831 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/mat/diagonal.go @@ -0,0 +1,342 @@ +// Copyright ©2018 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package mat + +import ( + "math" + + "gonum.org/v1/gonum/blas" + "gonum.org/v1/gonum/blas/blas64" +) + +var ( + diagDense *DiagDense + _ Matrix = diagDense + _ allMatrix = diagDense + _ denseMatrix = diagDense + _ Diagonal = diagDense + _ MutableDiagonal = diagDense + _ Triangular = diagDense + _ TriBanded = diagDense + _ Symmetric = diagDense + _ SymBanded = diagDense + _ Banded = diagDense + _ RawBander = diagDense + _ RawSymBander = diagDense + + diag Diagonal + _ Matrix = diag + _ Diagonal = diag + _ Triangular = diag + _ TriBanded = diag + _ Symmetric = diag + _ SymBanded = diag + _ Banded = diag +) + +// Diagonal represents a diagonal matrix, that is a square matrix that only +// has non-zero terms on the diagonal. +type Diagonal interface { + Matrix + // Diag returns the number of rows/columns in the matrix. + Diag() int + + // The following interfaces are included in the Diagonal + // interface to allow the use of Diagonal types in + // functions operating on these types. + Banded + SymBanded + Symmetric + Triangular + TriBanded +} + +// MutableDiagonal is a Diagonal matrix whose elements can be set. +type MutableDiagonal interface { + Diagonal + SetDiag(i int, v float64) +} + +// DiagDense represents a diagonal matrix in dense storage format. +type DiagDense struct { + mat blas64.Vector +} + +// NewDiagDense creates a new Diagonal matrix with n rows and n columns. +// The length of data must be n or data must be nil, otherwise NewDiagDense +// will panic. NewDiagDense will panic if n is zero. +func NewDiagDense(n int, data []float64) *DiagDense { + if n <= 0 { + if n == 0 { + panic(ErrZeroLength) + } + panic("mat: negative dimension") + } + if data == nil { + data = make([]float64, n) + } + if len(data) != n { + panic(ErrShape) + } + return &DiagDense{ + mat: blas64.Vector{N: n, Data: data, Inc: 1}, + } +} + +// Diag returns the dimension of the receiver. +func (d *DiagDense) Diag() int { + return d.mat.N +} + +// Dims returns the dimensions of the matrix. +func (d *DiagDense) Dims() (r, c int) { + return d.mat.N, d.mat.N +} + +// T returns the transpose of the matrix. +func (d *DiagDense) T() Matrix { + return d +} + +// TTri returns the transpose of the matrix. Note that Diagonal matrices are +// Upper by default. +func (d *DiagDense) TTri() Triangular { + return TransposeTri{d} +} + +// TBand performs an implicit transpose by returning the receiver inside a +// TransposeBand. +func (d *DiagDense) TBand() Banded { + return TransposeBand{d} +} + +// TTriBand performs an implicit transpose by returning the receiver inside a +// TransposeTriBand. Note that Diagonal matrices are Upper by default. +func (d *DiagDense) TTriBand() TriBanded { + return TransposeTriBand{d} +} + +// Bandwidth returns the upper and lower bandwidths of the matrix. +// These values are always zero for diagonal matrices. +func (d *DiagDense) Bandwidth() (kl, ku int) { + return 0, 0 +} + +// SymmetricDim implements the Symmetric interface. +func (d *DiagDense) SymmetricDim() int { + return d.mat.N +} + +// SymBand returns the number of rows/columns in the matrix, and the size of +// the bandwidth. +func (d *DiagDense) SymBand() (n, k int) { + return d.mat.N, 0 +} + +// Triangle implements the Triangular interface. +func (d *DiagDense) Triangle() (int, TriKind) { + return d.mat.N, Upper +} + +// TriBand returns the number of rows/columns in the matrix, the +// size of the bandwidth, and the orientation. Note that Diagonal matrices are +// Upper by default. +func (d *DiagDense) TriBand() (n, k int, kind TriKind) { + return d.mat.N, 0, Upper +} + +// Reset empties the matrix so that it can be reused as the +// receiver of a dimensionally restricted operation. +// +// Reset should not be used when the matrix shares backing data. +// See the Reseter interface for more information. +func (d *DiagDense) Reset() { + // No change of Inc or n to 0 may be + // made unless both are set to 0. + d.mat.Inc = 0 + d.mat.N = 0 + d.mat.Data = d.mat.Data[:0] +} + +// Zero sets all of the matrix elements to zero. +func (d *DiagDense) Zero() { + for i := 0; i < d.mat.N; i++ { + d.mat.Data[d.mat.Inc*i] = 0 + } +} + +// DiagView returns the diagonal as a matrix backed by the original data. +func (d *DiagDense) DiagView() Diagonal { + return d +} + +// DiagFrom copies the diagonal of m into the receiver. The receiver must +// be min(r, c) long or empty, otherwise DiagFrom will panic. +func (d *DiagDense) DiagFrom(m Matrix) { + n := min(m.Dims()) + d.reuseAsNonZeroed(n) + + var vec blas64.Vector + switch r := m.(type) { + case *DiagDense: + vec = r.mat + case RawBander: + mat := r.RawBand() + vec = blas64.Vector{ + N: n, + Inc: mat.Stride, + Data: mat.Data[mat.KL : (n-1)*mat.Stride+mat.KL+1], + } + case RawMatrixer: + mat := r.RawMatrix() + vec = blas64.Vector{ + N: n, + Inc: mat.Stride + 1, + Data: mat.Data[:(n-1)*mat.Stride+n], + } + case RawSymBander: + mat := r.RawSymBand() + vec = blas64.Vector{ + N: n, + Inc: mat.Stride, + Data: mat.Data[:(n-1)*mat.Stride+1], + } + case RawSymmetricer: + mat := r.RawSymmetric() + vec = blas64.Vector{ + N: n, + Inc: mat.Stride + 1, + Data: mat.Data[:(n-1)*mat.Stride+n], + } + case RawTriBander: + mat := r.RawTriBand() + data := mat.Data + if mat.Uplo == blas.Lower { + data = data[mat.K:] + } + vec = blas64.Vector{ + N: n, + Inc: mat.Stride, + Data: data[:(n-1)*mat.Stride+1], + } + case RawTriangular: + mat := r.RawTriangular() + if mat.Diag == blas.Unit { + for i := 0; i < n; i += d.mat.Inc { + d.mat.Data[i] = 1 + } + return + } + vec = blas64.Vector{ + N: n, + Inc: mat.Stride + 1, + Data: mat.Data[:(n-1)*mat.Stride+n], + } + case RawVectorer: + d.mat.Data[0] = r.RawVector().Data[0] + return + default: + for i := 0; i < n; i++ { + d.setDiag(i, m.At(i, i)) + } + return + } + blas64.Copy(vec, d.mat) +} + +// RawBand returns the underlying data used by the receiver represented +// as a blas64.Band. +// Changes to elements in the receiver following the call will be reflected +// in returned blas64.Band. +func (d *DiagDense) RawBand() blas64.Band { + return blas64.Band{ + Rows: d.mat.N, + Cols: d.mat.N, + KL: 0, + KU: 0, + Stride: d.mat.Inc, + Data: d.mat.Data, + } +} + +// RawSymBand returns the underlying data used by the receiver represented +// as a blas64.SymmetricBand. +// Changes to elements in the receiver following the call will be reflected +// in returned blas64.Band. +func (d *DiagDense) RawSymBand() blas64.SymmetricBand { + return blas64.SymmetricBand{ + N: d.mat.N, + K: 0, + Stride: d.mat.Inc, + Uplo: blas.Upper, + Data: d.mat.Data, + } +} + +// reuseAsNonZeroed resizes an empty diagonal to a r×r diagonal, +// or checks that a non-empty matrix is r×r. +func (d *DiagDense) reuseAsNonZeroed(r int) { + if r == 0 { + panic(ErrZeroLength) + } + if d.IsEmpty() { + d.mat = blas64.Vector{ + Inc: 1, + Data: use(d.mat.Data, r), + } + d.mat.N = r + return + } + if r != d.mat.N { + panic(ErrShape) + } +} + +// IsEmpty returns whether the receiver is empty. Empty matrices can be the +// receiver for size-restricted operations. The receiver can be emptied using +// Reset. +func (d *DiagDense) IsEmpty() bool { + // It must be the case that d.Dims() returns + // zeros in this case. See comment in Reset(). + return d.mat.Inc == 0 +} + +// Trace returns the trace of the matrix. +// +// Trace will panic with ErrZeroLength if the matrix has zero size. +func (d *DiagDense) Trace() float64 { + if d.IsEmpty() { + panic(ErrZeroLength) + } + rb := d.RawBand() + var tr float64 + for i := 0; i < rb.Rows; i++ { + tr += rb.Data[rb.KL+i*rb.Stride] + } + return tr +} + +// Norm returns the specified norm of the receiver. Valid norms are: +// +// 1 or Inf - The maximum diagonal element magnitude +// 2 - The Frobenius norm, the square root of the sum of the squares of +// the diagonal elements +// +// Norm will panic with ErrNormOrder if an illegal norm is specified and with +// ErrZeroLength if the receiver has zero size. +func (d *DiagDense) Norm(norm float64) float64 { + if d.IsEmpty() { + panic(ErrZeroLength) + } + switch norm { + default: + panic(ErrNormOrder) + case 1, math.Inf(1): + imax := blas64.Iamax(d.mat) + return math.Abs(d.at(imax, imax)) + case 2: + return blas64.Nrm2(d.mat) + } +} diff --git a/vendor/gonum.org/v1/gonum/mat/doc.go b/vendor/gonum.org/v1/gonum/mat/doc.go new file mode 100644 index 0000000000..f8c078cfef --- /dev/null +++ b/vendor/gonum.org/v1/gonum/mat/doc.go @@ -0,0 +1,200 @@ +// Copyright ©2015 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package mat provides implementations of float64 and complex128 matrix +// structures and linear algebra operations on them. +// +// # Overview +// +// This section provides a quick overview of the mat package. The following +// sections provide more in depth commentary. +// +// mat provides: +// - Interfaces for Matrix classes (Matrix, Symmetric, Triangular) +// - Concrete implementations (Dense, SymDense, TriDense, VecDense) +// - Methods and functions for using matrix data (Add, Trace, SymRankOne) +// - Types for constructing and using matrix factorizations (QR, LU, etc.) +// - The complementary types for complex matrices, CMatrix, CSymDense, etc. +// +// In the documentation below, we use "matrix" as a short-hand for all of +// the FooDense types implemented in this package. We use "Matrix" to +// refer to the Matrix interface. +// +// A matrix may be constructed through the corresponding New function. If no +// backing array is provided the matrix will be initialized to all zeros. +// +// // Allocate a zeroed real matrix of size 3×5 +// zero := mat.NewDense(3, 5, nil) +// +// If a backing data slice is provided, the matrix will have those elements. +// All matrices are stored in row-major format and users should consider +// this when expressing matrix arithmetic to ensure optimal performance. +// +// // Generate a 6×6 matrix of random values. +// data := make([]float64, 36) +// for i := range data { +// data[i] = rand.NormFloat64() +// } +// a := mat.NewDense(6, 6, data) +// +// Operations involving matrix data are implemented as functions when the values +// of the matrix remain unchanged +// +// tr := mat.Trace(a) +// +// and are implemented as methods when the operation modifies the receiver. +// +// zero.Copy(a) +// +// Note that the input arguments to most functions and methods are interfaces +// rather than concrete types `func Trace(Matrix)` rather than +// `func Trace(*Dense)` allowing flexible use of internal and external +// Matrix types. +// +// When a matrix is the destination or receiver for a function or method, +// the operation will panic if the matrix is not the correct size. +// An exception to this is when the destination is empty (see below). +// +// # Empty matrix +// +// An empty matrix is one that has zero size. Empty matrices are used to allow +// the destination of a matrix operation to assume the correct size automatically. +// This operation will re-use the backing data, if available, or will allocate +// new data if necessary. The IsEmpty method returns whether the given matrix +// is empty. The zero-value of a matrix is empty, and is useful for easily +// getting the result of matrix operations. +// +// var c mat.Dense // construct a new zero-value matrix +// c.Mul(a, a) // c is automatically adjusted to be the right size +// +// The Reset method can be used to revert a matrix to an empty matrix. +// Reset should not be used when multiple different matrices share the same backing +// data slice. This can cause unexpected data modifications after being resized. +// An empty matrix can not be sliced even if it does have an adequately sized +// backing data slice, but can be expanded using its Grow method if it exists. +// +// # The Matrix Interfaces +// +// The Matrix interface is the common link between the concrete types of real +// matrices. The Matrix interface is defined by three functions: Dims, which +// returns the dimensions of the Matrix, At, which returns the element in the +// specified location, and T for returning a Transpose (discussed later). All of +// the matrix types can perform these behaviors and so implement the interface. +// Methods and functions are designed to use this interface, so in particular the method +// +// func (m *Dense) Mul(a, b Matrix) +// +// constructs a *Dense from the result of a multiplication with any Matrix types, +// not just *Dense. Where more restrictive requirements must be met, there are also +// additional interfaces like Symmetric and Triangular. For example, in +// +// func (s *SymDense) AddSym(a, b Symmetric) +// +// the Symmetric interface guarantees a symmetric result. +// +// The CMatrix interface plays the same role for complex matrices. The difference +// is that the CMatrix type has the H method instead T, for returning the conjugate +// transpose. +// +// (Conjugate) Transposes +// +// The T method is used for transposition on real matrices, and H is used for +// conjugate transposition on complex matrices. For example, c.Mul(a.T(), b) computes +// c = aᵀ * b. The mat types implement this method implicitly — +// see the Transpose and Conjugate types for more details. Note that some +// operations have a transpose as part of their definition, as in *SymDense.SymOuterK. +// +// # Matrix Factorization +// +// Matrix factorizations, such as the LU decomposition, typically have their own +// specific data storage, and so are each implemented as a specific type. The +// factorization can be computed through a call to Factorize +// +// var lu mat.LU +// lu.Factorize(a) +// +// The elements of the factorization can be extracted through methods on the +// factorized type, for example *LU.UTo. The factorization types can also be used +// directly, as in *Cholesky.SolveTo. Some factorizations can be updated directly, +// without needing to update the original matrix and refactorize, for example with +// *LU.RankOne. +// +// # BLAS and LAPACK +// +// BLAS and LAPACK are the standard APIs for linear algebra routines. Many +// operations in mat are implemented using calls to the wrapper functions +// in gonum/blas/blas64 and gonum/lapack/lapack64 and their complex equivalents. +// By default, blas64 and lapack64 call the native Go implementations of the +// routines. Alternatively, it is possible to use C-based implementations of the +// APIs through the respective cgo packages and the wrapper packages' "Use" +// functions. The Go implementation of LAPACK makes calls through blas64, so if +// a cgo BLAS implementation is registered, the lapack64 calls will be partially +// executed in Go and partially executed in C. +// +// # Type Switching +// +// The Matrix abstraction enables efficiency as well as interoperability. Go's +// type reflection capabilities are used to choose the most efficient routine +// given the specific concrete types. For example, in +// +// c.Mul(a, b) +// +// if a and b both implement RawMatrixer, that is, they can be represented as a +// blas64.General, blas64.Gemm (general matrix multiplication) is called, while +// instead if b is a RawSymmetricer blas64.Symm is used (general-symmetric +// multiplication), and if b is a *VecDense blas64.Gemv is used. +// +// There are many possible type combinations and special cases. No specific guarantees +// are made about the performance of any method, and in particular, note that an +// abstract matrix type may be copied into a concrete type of the corresponding +// value. If there are specific special cases that are needed, please submit a +// pull-request or file an issue. +// +// # Invariants +// +// Matrix input arguments to package functions are never directly modified. If an +// operation changes Matrix data, the mutated matrix will be the receiver of a +// method, or will be the first, dst, argument to a method named with a To suffix. +// +// For convenience, a matrix may be used as both a receiver and as an input, e.g. +// +// a.Pow(a, 6) +// v.SolveVec(a.T(), v) +// +// though in many cases this will cause an allocation (see Element Aliasing). +// An exception to this rule is Copy, which does not allow a.Copy(a.T()). +// +// # Element Aliasing +// +// Most methods in mat modify receiver data. It is forbidden for the modified +// data region of the receiver to overlap the used data area of the input +// arguments. The exception to this rule is when the method receiver is equal to one +// of the input arguments, as in the a.Pow(a, 6) call above, or its implicit transpose. +// +// This prohibition is to help avoid subtle mistakes when the method needs to read +// from and write to the same data region. There are ways to make mistakes using the +// mat API, and mat functions will detect and complain about those. +// There are many ways to make mistakes by excursion from the mat API via +// interaction with raw matrix values. +// +// If you need to read the rest of this section to understand the behavior of +// your program, you are being clever. Don't be clever. If you must be clever, +// blas64 and lapack64 may be used to call the behavior directly. +// +// mat will use the following rules to detect overlap between the receiver and one +// of the inputs: +// - the input implements one of the Raw methods, and +// - the address ranges of the backing data slices overlap, and +// - the strides differ or there is an overlap in the used data elements. +// +// If such an overlap is detected, the method will panic. +// +// The following cases will not panic: +// - the data slices do not overlap, +// - there is pointer identity between the receiver and input values after +// the value has been untransposed if necessary. +// +// mat will not attempt to detect element overlap if the input does not implement a +// Raw method. Method behavior is undefined if there is undetected overlap. +package mat // import "gonum.org/v1/gonum/mat" diff --git a/vendor/gonum.org/v1/gonum/mat/eigen.go b/vendor/gonum.org/v1/gonum/mat/eigen.go new file mode 100644 index 0000000000..859247d880 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/mat/eigen.go @@ -0,0 +1,450 @@ +// Copyright ©2013 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package mat + +import ( + "gonum.org/v1/gonum/lapack" + "gonum.org/v1/gonum/lapack/lapack64" +) + +const ( + badFact = "mat: use without successful factorization" + noVectors = "mat: eigenvectors not computed" +) + +// EigenSym is a type for computing all eigenvalues and, optionally, +// eigenvectors of a symmetric matrix A. +// +// It is a Symmetric matrix represented by its spectral factorization. Once +// computed, this representation is useful for extracting eigenvalues and +// eigenvector, but At is slow. +type EigenSym struct { + vectorsComputed bool + + values []float64 + vectors *Dense +} + +// Dims returns the dimensions of the matrix. +func (e *EigenSym) Dims() (r, c int) { + n := e.SymmetricDim() + return n, n +} + +// SymmetricDim implements the Symmetric interface. +func (e *EigenSym) SymmetricDim() int { + return len(e.values) +} + +// At returns the element at row i, column j of the matrix A. +// +// At will panic if the eigenvectors have not been computed. +func (e *EigenSym) At(i, j int) float64 { + if !e.vectorsComputed { + panic(noVectors) + } + n, _ := e.Dims() + if uint(i) >= uint(n) { + panic(ErrRowAccess) + } + if uint(j) >= uint(n) { + panic(ErrColAccess) + } + + var val float64 + for k := 0; k < n; k++ { + val += e.values[k] * e.vectors.at(i, k) * e.vectors.at(j, k) + } + return val +} + +// T returns the receiver, the transpose of a symmetric matrix. +func (e *EigenSym) T() Matrix { + return e +} + +// Factorize computes the spectral factorization (eigendecomposition) of the +// symmetric matrix A. +// +// The spectral factorization of A can be written as +// +// A = Q * Λ * Qᵀ +// +// where Λ is a diagonal matrix whose entries are the eigenvalues, and Q is an +// orthogonal matrix whose columns are the eigenvectors. +// +// If vectors is false, the eigenvectors are not computed and later calls to +// VectorsTo and At will panic. +// +// Factorize returns whether the factorization succeeded. If it returns false, +// methods that require a successful factorization will panic. +func (e *EigenSym) Factorize(a Symmetric, vectors bool) (ok bool) { + // kill previous decomposition + e.vectorsComputed = false + e.values = e.values[:] + + n := a.SymmetricDim() + sd := NewSymDense(n, nil) + sd.CopySym(a) + + jobz := lapack.EVNone + if vectors { + jobz = lapack.EVCompute + } + w := make([]float64, n) + work := []float64{0} + lapack64.Syev(jobz, sd.mat, w, work, -1) + + work = getFloat64s(int(work[0]), false) + ok = lapack64.Syev(jobz, sd.mat, w, work, len(work)) + putFloat64s(work) + if !ok { + e.vectorsComputed = false + e.values = nil + e.vectors = nil + return false + } + e.vectorsComputed = vectors + e.values = w + e.vectors = NewDense(n, n, sd.mat.Data) + return true +} + +// succFact returns whether the receiver contains a successful factorization. +func (e *EigenSym) succFact() bool { + return len(e.values) != 0 +} + +// Values extracts the eigenvalues of the factorized n×n matrix A in ascending +// order. +// +// If dst is not nil, the values are stored in-place into dst and returned, +// otherwise a new slice is allocated first. If dst is not nil, it must have +// length equal to n. +// +// If the receiver does not contain a successful factorization, Values will +// panic. +func (e *EigenSym) Values(dst []float64) []float64 { + if !e.succFact() { + panic(badFact) + } + if dst == nil { + dst = make([]float64, len(e.values)) + } + if len(dst) != len(e.values) { + panic(ErrSliceLengthMismatch) + } + copy(dst, e.values) + return dst +} + +// RawValues returns the slice storing the eigenvalues of A in ascending order. +// +// If the returned slice is modified, the factorization is invalid and should +// not be used. +// +// If the receiver does not contain a successful factorization, RawValues will +// return nil. +func (e *EigenSym) RawValues() []float64 { + if !e.succFact() { + return nil + } + return e.values +} + +// VectorsTo stores the orthonormal eigenvectors of the factorized n×n matrix A +// into the columns of dst. +// +// If dst is empty, VectorsTo will resize dst to be n×n. When dst is non-empty, +// VectorsTo will panic if dst is not n×n. VectorsTo will also panic if the +// eigenvectors were not computed during the factorization, or if the receiver +// does not contain a successful factorization. +func (e *EigenSym) VectorsTo(dst *Dense) { + if !e.succFact() { + panic(badFact) + } + if !e.vectorsComputed { + panic(noVectors) + } + r, c := e.vectors.Dims() + if dst.IsEmpty() { + dst.ReuseAs(r, c) + } else { + r2, c2 := dst.Dims() + if r != r2 || c != c2 { + panic(ErrShape) + } + } + dst.Copy(e.vectors) +} + +// RawQ returns the orthogonal matrix Q from the spectral factorization of the +// original matrix A +// +// A = Q * Λ * Qᵀ +// +// The columns of Q contain the eigenvectors of A. +// +// If the returned matrix is modified, the factorization is invalid and should +// not be used. +// +// If the receiver does not contain a successful factorization or eigenvectors +// not computed, RawU will return nil. +func (e *EigenSym) RawQ() Matrix { + if !e.succFact() || !e.vectorsComputed { + return nil + } + return e.vectors +} + +// EigenKind specifies the computation of eigenvectors during factorization. +type EigenKind int + +const ( + // EigenNone specifies to not compute any eigenvectors. + EigenNone EigenKind = 0 + // EigenLeft specifies to compute the left eigenvectors. + EigenLeft EigenKind = 1 << iota + // EigenRight specifies to compute the right eigenvectors. + EigenRight + // EigenBoth is a convenience value for computing both eigenvectors. + EigenBoth EigenKind = EigenLeft | EigenRight +) + +// Eigen is a type for creating and using the eigenvalue decomposition of a dense matrix. +type Eigen struct { + n int // The size of the factorized matrix. + + kind EigenKind + + values []complex128 + rVectors *CDense + lVectors *CDense +} + +// succFact returns whether the receiver contains a successful factorization. +func (e *Eigen) succFact() bool { + return e.n != 0 +} + +// Factorize computes the eigenvalues of the square matrix a, and optionally +// the eigenvectors. +// +// A right eigenvalue/eigenvector combination is defined by +// +// A * x_r = λ * x_r +// +// where x_r is the column vector called an eigenvector, and λ is the corresponding +// eigenvalue. +// +// Similarly, a left eigenvalue/eigenvector combination is defined by +// +// x_l * A = λ * x_l +// +// The eigenvalues, but not the eigenvectors, are the same for both decompositions. +// +// Typically eigenvectors refer to right eigenvectors. +// +// In all cases, Factorize computes the eigenvalues of the matrix. kind +// specifies which of the eigenvectors, if any, to compute. See the EigenKind +// documentation for more information. +// Eigen panics if the input matrix is not square. +// +// Factorize returns whether the decomposition succeeded. If the decomposition +// failed, methods that require a successful factorization will panic. +func (e *Eigen) Factorize(a Matrix, kind EigenKind) (ok bool) { + // kill previous factorization. + e.n = 0 + e.kind = 0 + // Copy a because it is modified during the Lapack call. + r, c := a.Dims() + if r != c { + panic(ErrShape) + } + var sd Dense + sd.CloneFrom(a) + + left := kind&EigenLeft != 0 + right := kind&EigenRight != 0 + + var vl, vr Dense + jobvl := lapack.LeftEVNone + jobvr := lapack.RightEVNone + if left { + vl = *NewDense(r, r, nil) + jobvl = lapack.LeftEVCompute + } + if right { + vr = *NewDense(c, c, nil) + jobvr = lapack.RightEVCompute + } + + wr := getFloat64s(c, false) + defer putFloat64s(wr) + wi := getFloat64s(c, false) + defer putFloat64s(wi) + + work := []float64{0} + lapack64.Geev(jobvl, jobvr, sd.mat, wr, wi, vl.mat, vr.mat, work, -1) + work = getFloat64s(int(work[0]), false) + first := lapack64.Geev(jobvl, jobvr, sd.mat, wr, wi, vl.mat, vr.mat, work, len(work)) + putFloat64s(work) + + if first != 0 { + e.values = nil + return false + } + e.n = r + e.kind = kind + + // Construct complex eigenvalues from float64 data. + values := make([]complex128, r) + for i, v := range wr { + values[i] = complex(v, wi[i]) + } + e.values = values + + // Construct complex eigenvectors from float64 data. + var cvl, cvr CDense + if left { + cvl = *NewCDense(r, r, nil) + e.complexEigenTo(&cvl, &vl) + e.lVectors = &cvl + } else { + e.lVectors = nil + } + if right { + cvr = *NewCDense(c, c, nil) + e.complexEigenTo(&cvr, &vr) + e.rVectors = &cvr + } else { + e.rVectors = nil + } + return true +} + +// Kind returns the EigenKind of the decomposition. If no decomposition has been +// computed, Kind returns -1. +func (e *Eigen) Kind() EigenKind { + if !e.succFact() { + return -1 + } + return e.kind +} + +// Values extracts the eigenvalues of the factorized matrix. If dst is +// non-nil, the values are stored in-place into dst. In this case +// dst must have length n, otherwise Values will panic. If dst is +// nil, then a new slice will be allocated of the proper length and +// filed with the eigenvalues. +// +// Values panics if the Eigen decomposition was not successful. +func (e *Eigen) Values(dst []complex128) []complex128 { + if !e.succFact() { + panic(badFact) + } + if dst == nil { + dst = make([]complex128, e.n) + } + if len(dst) != e.n { + panic(ErrSliceLengthMismatch) + } + copy(dst, e.values) + return dst +} + +// complexEigenTo extracts the complex eigenvectors from the real matrix d +// and stores them into the complex matrix dst. +// +// The columns of the returned n×n dense matrix contain the eigenvectors of the +// decomposition in the same order as the eigenvalues. +// If the j-th eigenvalue is real, then +// +// dst[:,j] = d[:,j], +// +// and if it is not real, then the elements of the j-th and (j+1)-th columns of d +// form complex conjugate pairs and the eigenvectors are recovered as +// +// dst[:,j] = d[:,j] + i*d[:,j+1], +// dst[:,j+1] = d[:,j] - i*d[:,j+1], +// +// where i is the imaginary unit. +func (e *Eigen) complexEigenTo(dst *CDense, d *Dense) { + r, c := d.Dims() + cr, cc := dst.Dims() + if r != cr { + panic("size mismatch") + } + if c != cc { + panic("size mismatch") + } + for j := 0; j < c; j++ { + if imag(e.values[j]) == 0 { + for i := 0; i < r; i++ { + dst.set(i, j, complex(d.at(i, j), 0)) + } + continue + } + for i := 0; i < r; i++ { + real := d.at(i, j) + imag := d.at(i, j+1) + dst.set(i, j, complex(real, imag)) + dst.set(i, j+1, complex(real, -imag)) + } + j++ + } +} + +// VectorsTo stores the right eigenvectors of the decomposition into the columns +// of dst. The computed eigenvectors are normalized to have Euclidean norm equal +// to 1 and largest component real. +// +// If dst is empty, VectorsTo will resize dst to be n×n. When dst is +// non-empty, VectorsTo will panic if dst is not n×n. VectorsTo will also +// panic if the eigenvectors were not computed during the factorization, +// or if the receiver does not contain a successful factorization. +func (e *Eigen) VectorsTo(dst *CDense) { + if !e.succFact() { + panic(badFact) + } + if e.kind&EigenRight == 0 { + panic(noVectors) + } + if dst.IsEmpty() { + dst.ReuseAs(e.n, e.n) + } else { + r, c := dst.Dims() + if r != e.n || c != e.n { + panic(ErrShape) + } + } + dst.Copy(e.rVectors) +} + +// LeftVectorsTo stores the left eigenvectors of the decomposition into the +// columns of dst. The computed eigenvectors are normalized to have Euclidean +// norm equal to 1 and largest component real. +// +// If dst is empty, LeftVectorsTo will resize dst to be n×n. When dst is +// non-empty, LeftVectorsTo will panic if dst is not n×n. LeftVectorsTo will also +// panic if the left eigenvectors were not computed during the factorization, +// or if the receiver does not contain a successful factorization +func (e *Eigen) LeftVectorsTo(dst *CDense) { + if !e.succFact() { + panic(badFact) + } + if e.kind&EigenLeft == 0 { + panic(noVectors) + } + if dst.IsEmpty() { + dst.ReuseAs(e.n, e.n) + } else { + r, c := dst.Dims() + if r != e.n || c != e.n { + panic(ErrShape) + } + } + dst.Copy(e.lVectors) +} diff --git a/vendor/gonum.org/v1/gonum/mat/errors.go b/vendor/gonum.org/v1/gonum/mat/errors.go new file mode 100644 index 0000000000..641d816219 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/mat/errors.go @@ -0,0 +1,154 @@ +// Copyright ©2013 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package mat + +import ( + "fmt" + "runtime" + + "gonum.org/v1/gonum/lapack" +) + +// Condition is the condition number of a matrix. The condition +// number is defined as |A| * |A^-1|. +// +// One important use of Condition is during linear solve routines (finding x such +// that A * x = b). The condition number of A indicates the accuracy of +// the computed solution. A Condition error will be returned if the condition +// number of A is sufficiently large. If A is exactly singular to working precision, +// Condition == ∞, and the solve algorithm may have completed early. If Condition +// is large and finite the solve algorithm will be performed, but the computed +// solution may be inaccurate. Due to the nature of finite precision arithmetic, +// the value of Condition is only an approximate test of singularity. +type Condition float64 + +func (c Condition) Error() string { + return fmt.Sprintf("matrix singular or near-singular with condition number %.4e", c) +} + +// ConditionTolerance is the tolerance limit of the condition number. If the +// condition number is above this value, the matrix is considered singular. +const ConditionTolerance = 1e16 + +const ( + // CondNorm is the matrix norm used for computing the condition number by routines + // in the matrix packages. + CondNorm = lapack.MaxRowSum + + // CondNormTrans is the norm used to compute on Aᵀ to get the same result as + // computing CondNorm on A. + CondNormTrans = lapack.MaxColumnSum +) + +const stackTraceBufferSize = 1 << 20 + +// Maybe will recover a panic with a type mat.Error from fn, and return this error +// as the Err field of an ErrorStack. The stack trace for the panicking function will be +// recovered and placed in the StackTrace field. Any other error is re-panicked. +func Maybe(fn func()) (err error) { + defer func() { + if r := recover(); r != nil { + if e, ok := r.(Error); ok { + if e.string == "" { + panic("mat: invalid error") + } + buf := make([]byte, stackTraceBufferSize) + n := runtime.Stack(buf, false) + err = ErrorStack{Err: e, StackTrace: string(buf[:n])} + return + } + panic(r) + } + }() + fn() + return +} + +// MaybeFloat will recover a panic with a type mat.Error from fn, and return this error +// as the Err field of an ErrorStack. The stack trace for the panicking function will be +// recovered and placed in the StackTrace field. Any other error is re-panicked. +func MaybeFloat(fn func() float64) (f float64, err error) { + defer func() { + if r := recover(); r != nil { + if e, ok := r.(Error); ok { + if e.string == "" { + panic("mat: invalid error") + } + buf := make([]byte, stackTraceBufferSize) + n := runtime.Stack(buf, false) + err = ErrorStack{Err: e, StackTrace: string(buf[:n])} + return + } + panic(r) + } + }() + return fn(), nil +} + +// MaybeComplex will recover a panic with a type mat.Error from fn, and return this error +// as the Err field of an ErrorStack. The stack trace for the panicking function will be +// recovered and placed in the StackTrace field. Any other error is re-panicked. +func MaybeComplex(fn func() complex128) (f complex128, err error) { + defer func() { + if r := recover(); r != nil { + if e, ok := r.(Error); ok { + if e.string == "" { + panic("mat: invalid error") + } + buf := make([]byte, stackTraceBufferSize) + n := runtime.Stack(buf, false) + err = ErrorStack{Err: e, StackTrace: string(buf[:n])} + return + } + panic(r) + } + }() + return fn(), nil +} + +// Error represents matrix handling errors. These errors can be recovered by Maybe wrappers. +type Error struct{ string } + +func (err Error) Error() string { return err.string } + +var ( + ErrNegativeDimension = Error{"mat: negative dimension"} + ErrIndexOutOfRange = Error{"mat: index out of range"} + ErrReuseNonEmpty = Error{"mat: reuse of non-empty matrix"} + ErrRowAccess = Error{"mat: row index out of range"} + ErrColAccess = Error{"mat: column index out of range"} + ErrVectorAccess = Error{"mat: vector index out of range"} + ErrZeroLength = Error{"mat: zero length in matrix dimension"} + ErrRowLength = Error{"mat: row length mismatch"} + ErrColLength = Error{"mat: col length mismatch"} + ErrSquare = Error{"mat: expect square matrix"} + ErrNormOrder = Error{"mat: invalid norm order for matrix"} + ErrSingular = Error{"mat: matrix is singular"} + ErrShape = Error{"mat: dimension mismatch"} + ErrIllegalStride = Error{"mat: illegal stride"} + ErrPivot = Error{"mat: malformed pivot list"} + ErrTriangle = Error{"mat: triangular storage mismatch"} + ErrTriangleSet = Error{"mat: triangular set out of bounds"} + ErrBandwidth = Error{"mat: bandwidth out of range"} + ErrBandSet = Error{"mat: band set out of bounds"} + ErrDiagSet = Error{"mat: diagonal set out of bounds"} + ErrSliceLengthMismatch = Error{"mat: input slice length mismatch"} + ErrNotPSD = Error{"mat: input not positive symmetric definite"} + ErrFailedEigen = Error{"mat: eigendecomposition not successful"} +) + +// ErrorStack represents matrix handling errors that have been recovered by Maybe wrappers. +type ErrorStack struct { + Err error + + // StackTrace is the stack trace + // recovered by Maybe, MaybeFloat + // or MaybeComplex. + StackTrace string +} + +func (err ErrorStack) Error() string { return err.Err.Error() } + +const badCap = "mat: bad capacity" diff --git a/vendor/gonum.org/v1/gonum/mat/format.go b/vendor/gonum.org/v1/gonum/mat/format.go new file mode 100644 index 0000000000..c239ddd363 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/mat/format.go @@ -0,0 +1,516 @@ +// Copyright ©2013 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package mat + +import ( + "fmt" + "strconv" + "strings" +) + +// Formatted returns a fmt.Formatter for the matrix m using the given options. +func Formatted(m Matrix, options ...FormatOption) fmt.Formatter { + f := formatter{ + matrix: m, + dot: '.', + } + for _, o := range options { + o(&f) + } + return f +} + +type formatter struct { + matrix Matrix + prefix string + margin int + dot byte + squeeze bool + + format func(m Matrix, prefix string, margin int, dot byte, squeeze bool, fs fmt.State, c rune) +} + +// FormatOption is a functional option for matrix formatting. +type FormatOption func(*formatter) + +// Prefix sets the formatted prefix to the string p. Prefix is a string that is prepended to +// each line of output after the first line. +func Prefix(p string) FormatOption { + return func(f *formatter) { f.prefix = p } +} + +// Excerpt sets the maximum number of rows and columns to print at the margins of the matrix +// to m. If m is zero or less all elements are printed. +func Excerpt(m int) FormatOption { + return func(f *formatter) { f.margin = m } +} + +// DotByte sets the dot character to b. The dot character is used to replace zero elements +// if the result is printed with the fmt ' ' verb flag. Without a DotByte option, the default +// dot character is '.'. +func DotByte(b byte) FormatOption { + return func(f *formatter) { f.dot = b } +} + +// Squeeze sets the printing behavior to minimise column width for each individual column. +func Squeeze() FormatOption { + return func(f *formatter) { f.squeeze = true } +} + +// FormatMATLAB sets the printing behavior to output MATLAB syntax. If MATLAB syntax is +// specified, the ' ' verb flag and Excerpt option are ignored. If the alternative syntax +// verb flag, '#' is used the matrix is formatted in rows and columns. +func FormatMATLAB() FormatOption { + return func(f *formatter) { f.format = formatMATLAB } +} + +// FormatPython sets the printing behavior to output Python syntax. If Python syntax is +// specified, the ' ' verb flag and Excerpt option are ignored. If the alternative syntax +// verb flag, '#' is used the matrix is formatted in rows and columns. +func FormatPython() FormatOption { + return func(f *formatter) { f.format = formatPython } +} + +// Format satisfies the fmt.Formatter interface. +func (f formatter) Format(fs fmt.State, c rune) { + if c == 'v' && fs.Flag('#') && f.format == nil { + fmt.Fprintf(fs, "%#v", f.matrix) + return + } + if f.format == nil { + f.format = format + } + f.format(f.matrix, f.prefix, f.margin, f.dot, f.squeeze, fs, c) +} + +// format prints a pretty representation of m to the fs io.Writer. The format character c +// specifies the numerical representation of elements; valid values are those for float64 +// specified in the fmt package, with their associated flags. In addition to this, a space +// preceding a verb indicates that zero values should be represented by the dot character. +// The printed range of the matrix can be limited by specifying a positive value for margin; +// If margin is greater than zero, only the first and last margin rows/columns of the matrix +// are output. If squeeze is true, column widths are determined on a per-column basis. +// +// format will not provide Go syntax output. +func format(m Matrix, prefix string, margin int, dot byte, squeeze bool, fs fmt.State, c rune) { + rows, cols := m.Dims() + + var printed int + if margin <= 0 { + printed = rows + if cols > printed { + printed = cols + } + } else { + printed = margin + } + + prec, pOk := fs.Precision() + if !pOk { + prec = -1 + } + + var ( + maxWidth int + widths widther + buf, pad []byte + ) + if squeeze { + widths = make(columnWidth, cols) + } else { + widths = new(uniformWidth) + } + switch c { + case 'v', 'e', 'E', 'f', 'F', 'g', 'G': + if c == 'v' { + buf, maxWidth = maxCellWidth(m, 'g', printed, prec, widths) + } else { + buf, maxWidth = maxCellWidth(m, c, printed, prec, widths) + } + default: + fmt.Fprintf(fs, "%%!%c(%T=Dims(%d, %d))", c, m, rows, cols) + return + } + width, _ := fs.Width() + width = max(width, maxWidth) + pad = make([]byte, max(width, 2)) + for i := range pad { + pad[i] = ' ' + } + + first := true + if rows > 2*printed || cols > 2*printed { + first = false + fmt.Fprintf(fs, "Dims(%d, %d)\n", rows, cols) + } + + skipZero := fs.Flag(' ') + for i := 0; i < rows; i++ { + if !first { + fmt.Fprint(fs, prefix) + } + first = false + var el string + switch { + case rows == 1: + fmt.Fprint(fs, "[") + el = "]" + case i == 0: + fmt.Fprint(fs, "⎡") + el = "⎤\n" + case i < rows-1: + fmt.Fprint(fs, "⎢") + el = "⎥\n" + default: + fmt.Fprint(fs, "⎣") + el = "⎦" + } + + for j := 0; j < cols; j++ { + if j >= printed && j < cols-printed { + j = cols - printed - 1 + if i == 0 || i == rows-1 { + fmt.Fprint(fs, "... ... ") + } else { + fmt.Fprint(fs, " ") + } + continue + } + + v := m.At(i, j) + if v == 0 && skipZero { + buf = buf[:1] + buf[0] = dot + } else { + if c == 'v' { + buf = strconv.AppendFloat(buf[:0], v, 'g', prec, 64) + } else { + buf = strconv.AppendFloat(buf[:0], v, byte(c), prec, 64) + } + } + if fs.Flag('-') { + fs.Write(buf) + fs.Write(pad[:widths.width(j)-len(buf)]) + } else { + fs.Write(pad[:widths.width(j)-len(buf)]) + fs.Write(buf) + } + + if j < cols-1 { + fs.Write(pad[:2]) + } + } + + fmt.Fprint(fs, el) + + if i >= printed-1 && i < rows-printed && 2*printed < rows { + i = rows - printed - 1 + fmt.Fprintf(fs, "%s .\n%[1]s .\n%[1]s .\n", prefix) + continue + } + } +} + +// formatMATLAB prints a MATLAB representation of m to the fs io.Writer. The format character c +// specifies the numerical representation of elements; valid values are those for float64 +// specified in the fmt package, with their associated flags. +// The printed range of the matrix can be limited by specifying a positive value for margin; +// If squeeze is true, column widths are determined on a per-column basis. +// +// formatMATLAB will not provide Go syntax output. +func formatMATLAB(m Matrix, prefix string, _ int, _ byte, squeeze bool, fs fmt.State, c rune) { + rows, cols := m.Dims() + + prec, pOk := fs.Precision() + width, _ := fs.Width() + if !fs.Flag('#') { + switch c { + case 'v', 'e', 'E', 'f', 'F', 'g', 'G': + default: + fmt.Fprintf(fs, "%%!%c(%T=Dims(%d, %d))", c, m, rows, cols) + return + } + format := fmtString(fs, c, prec, width) + fs.Write([]byte{'['}) + for i := 0; i < rows; i++ { + if i != 0 { + fs.Write([]byte("; ")) + } + for j := 0; j < cols; j++ { + if j != 0 { + fs.Write([]byte{' '}) + } + fmt.Fprintf(fs, format, m.At(i, j)) + } + } + fs.Write([]byte{']'}) + return + } + + if !pOk { + prec = -1 + } + + printed := rows + if cols > printed { + printed = cols + } + + var ( + maxWidth int + widths widther + buf, pad []byte + ) + if squeeze { + widths = make(columnWidth, cols) + } else { + widths = new(uniformWidth) + } + switch c { + case 'v', 'e', 'E', 'f', 'F', 'g', 'G': + if c == 'v' { + buf, maxWidth = maxCellWidth(m, 'g', printed, prec, widths) + } else { + buf, maxWidth = maxCellWidth(m, c, printed, prec, widths) + } + default: + fmt.Fprintf(fs, "%%!%c(%T=Dims(%d, %d))", c, m, rows, cols) + return + } + width = max(width, maxWidth) + pad = make([]byte, max(width, 1)) + for i := range pad { + pad[i] = ' ' + } + + for i := 0; i < rows; i++ { + var el string + switch { + case rows == 1: + fmt.Fprint(fs, "[") + el = "]" + case i == 0: + fmt.Fprint(fs, "[\n"+prefix+" ") + el = "\n" + case i < rows-1: + fmt.Fprint(fs, prefix+" ") + el = "\n" + default: + fmt.Fprint(fs, prefix+" ") + el = "\n" + prefix + "]" + } + + for j := 0; j < cols; j++ { + v := m.At(i, j) + if c == 'v' { + buf = strconv.AppendFloat(buf[:0], v, 'g', prec, 64) + } else { + buf = strconv.AppendFloat(buf[:0], v, byte(c), prec, 64) + } + if fs.Flag('-') { + fs.Write(buf) + fs.Write(pad[:widths.width(j)-len(buf)]) + } else { + fs.Write(pad[:widths.width(j)-len(buf)]) + fs.Write(buf) + } + + if j < cols-1 { + fs.Write(pad[:1]) + } + } + + fmt.Fprint(fs, el) + } +} + +// formatPython prints a Python representation of m to the fs io.Writer. The format character c +// specifies the numerical representation of elements; valid values are those for float64 +// specified in the fmt package, with their associated flags. +// The printed range of the matrix can be limited by specifying a positive value for margin; +// If squeeze is true, column widths are determined on a per-column basis. +// +// formatPython will not provide Go syntax output. +func formatPython(m Matrix, prefix string, _ int, _ byte, squeeze bool, fs fmt.State, c rune) { + rows, cols := m.Dims() + + prec, pOk := fs.Precision() + width, _ := fs.Width() + if !fs.Flag('#') { + switch c { + case 'v', 'e', 'E', 'f', 'F', 'g', 'G': + default: + fmt.Fprintf(fs, "%%!%c(%T=Dims(%d, %d))", c, m, rows, cols) + return + } + format := fmtString(fs, c, prec, width) + fs.Write([]byte{'['}) + if rows > 1 { + fs.Write([]byte{'['}) + } + for i := 0; i < rows; i++ { + if i != 0 { + fs.Write([]byte("], [")) + } + for j := 0; j < cols; j++ { + if j != 0 { + fs.Write([]byte(", ")) + } + fmt.Fprintf(fs, format, m.At(i, j)) + } + } + if rows > 1 { + fs.Write([]byte{']'}) + } + fs.Write([]byte{']'}) + return + } + + if !pOk { + prec = -1 + } + + printed := rows + if cols > printed { + printed = cols + } + + var ( + maxWidth int + widths widther + buf, pad []byte + ) + if squeeze { + widths = make(columnWidth, cols) + } else { + widths = new(uniformWidth) + } + switch c { + case 'v', 'e', 'E', 'f', 'F', 'g', 'G': + if c == 'v' { + buf, maxWidth = maxCellWidth(m, 'g', printed, prec, widths) + } else { + buf, maxWidth = maxCellWidth(m, c, printed, prec, widths) + } + default: + fmt.Fprintf(fs, "%%!%c(%T=Dims(%d, %d))", c, m, rows, cols) + return + } + width = max(width, maxWidth) + pad = make([]byte, max(width, 1)) + for i := range pad { + pad[i] = ' ' + } + + for i := 0; i < rows; i++ { + if i != 0 { + fmt.Fprint(fs, prefix) + } + var el string + switch { + case rows == 1: + fmt.Fprint(fs, "[") + el = "]" + case i == 0: + fmt.Fprint(fs, "[[") + el = "],\n" + case i < rows-1: + fmt.Fprint(fs, " [") + el = "],\n" + default: + fmt.Fprint(fs, " [") + el = "]]" + } + + for j := 0; j < cols; j++ { + v := m.At(i, j) + if c == 'v' { + buf = strconv.AppendFloat(buf[:0], v, 'g', prec, 64) + } else { + buf = strconv.AppendFloat(buf[:0], v, byte(c), prec, 64) + } + if fs.Flag('-') { + fs.Write(buf) + fs.Write(pad[:widths.width(j)-len(buf)]) + } else { + fs.Write(pad[:widths.width(j)-len(buf)]) + fs.Write(buf) + } + + if j < cols-1 { + fs.Write([]byte{','}) + fs.Write(pad[:1]) + } + } + + fmt.Fprint(fs, el) + } +} + +// This is horrible, but it's what we have. +func fmtString(fs fmt.State, c rune, prec, width int) string { + var b strings.Builder + b.WriteByte('%') + for _, f := range "0+- " { + if fs.Flag(int(f)) { + b.WriteByte(byte(f)) + } + } + if width >= 0 { + fmt.Fprint(&b, width) + } + if prec >= 0 { + b.WriteByte('.') + if prec > 0 { + fmt.Fprint(&b, prec) + } + } + b.WriteRune(c) + return b.String() +} + +func maxCellWidth(m Matrix, c rune, printed, prec int, w widther) ([]byte, int) { + var ( + buf = make([]byte, 0, 64) + rows, cols = m.Dims() + max int + ) + for i := 0; i < rows; i++ { + if i >= printed-1 && i < rows-printed && 2*printed < rows { + i = rows - printed - 1 + continue + } + for j := 0; j < cols; j++ { + if j >= printed && j < cols-printed { + continue + } + + buf = strconv.AppendFloat(buf, m.At(i, j), byte(c), prec, 64) + if len(buf) > max { + max = len(buf) + } + if len(buf) > w.width(j) { + w.setWidth(j, len(buf)) + } + buf = buf[:0] + } + } + return buf, max +} + +type widther interface { + width(i int) int + setWidth(i, w int) +} + +type uniformWidth int + +func (u *uniformWidth) width(_ int) int { return int(*u) } +func (u *uniformWidth) setWidth(_, w int) { *u = uniformWidth(w) } + +type columnWidth []int + +func (c columnWidth) width(i int) int { return c[i] } +func (c columnWidth) setWidth(i, w int) { c[i] = w } diff --git a/vendor/gonum.org/v1/gonum/mat/gsvd.go b/vendor/gonum.org/v1/gonum/mat/gsvd.go new file mode 100644 index 0000000000..02286207cf --- /dev/null +++ b/vendor/gonum.org/v1/gonum/mat/gsvd.go @@ -0,0 +1,436 @@ +// Copyright ©2017 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package mat + +import ( + "gonum.org/v1/gonum/blas/blas64" + "gonum.org/v1/gonum/floats" + "gonum.org/v1/gonum/lapack" + "gonum.org/v1/gonum/lapack/lapack64" +) + +// GSVDKind specifies the treatment of singular vectors during a GSVD +// factorization. +type GSVDKind int + +const ( + // GSVDNone specifies that no singular vectors should be computed during + // the decomposition. + GSVDNone GSVDKind = 0 + + // GSVDU specifies that the U singular vectors should be computed during + // the decomposition. + GSVDU GSVDKind = 1 << iota + // GSVDV specifies that the V singular vectors should be computed during + // the decomposition. + GSVDV + // GSVDQ specifies that the Q singular vectors should be computed during + // the decomposition. + GSVDQ + + // GSVDAll is a convenience value for computing all of the singular vectors. + GSVDAll = GSVDU | GSVDV | GSVDQ +) + +// GSVD is a type for creating and using the Generalized Singular Value Decomposition +// (GSVD) of a matrix. +// +// The factorization is a linear transformation of the data sets from the given +// variable×sample spaces to reduced and diagonalized "eigenvariable"×"eigensample" +// spaces. +type GSVD struct { + kind GSVDKind + + r, p, c, k, l int + s1, s2 []float64 + a, b, u, v, q blas64.General + + work []float64 + iwork []int +} + +// succFact returns whether the receiver contains a successful factorization. +func (gsvd *GSVD) succFact() bool { + return gsvd.r != 0 +} + +// Factorize computes the generalized singular value decomposition (GSVD) of the input +// the r×c matrix A and the p×c matrix B. The singular values of A and B are computed +// in all cases, while the singular vectors are optionally computed depending on the +// input kind. +// +// The full singular value decomposition (kind == GSVDAll) deconstructs A and B as +// +// A = U * Σ₁ * [ 0 R ] * Qᵀ +// +// B = V * Σ₂ * [ 0 R ] * Qᵀ +// +// where Σ₁ and Σ₂ are r×(k+l) and p×(k+l) diagonal matrices of singular values, and +// U, V and Q are r×r, p×p and c×c orthogonal matrices of singular vectors. k+l is the +// effective numerical rank of the matrix [ Aᵀ Bᵀ ]ᵀ. +// +// It is frequently not necessary to compute the full GSVD. Computation time and +// storage costs can be reduced using the appropriate kind. Either only the singular +// values can be computed (kind == SVDNone), or in conjunction with specific singular +// vectors (kind bit set according to GSVDU, GSVDV and GSVDQ). +// +// Factorize returns whether the decomposition succeeded. If the decomposition +// failed, routines that require a successful factorization will panic. +func (gsvd *GSVD) Factorize(a, b Matrix, kind GSVDKind) (ok bool) { + // kill the previous decomposition + gsvd.r = 0 + gsvd.kind = 0 + + r, c := a.Dims() + gsvd.r, gsvd.c = r, c + p, c := b.Dims() + gsvd.p = p + if gsvd.c != c { + panic(ErrShape) + } + var jobU, jobV, jobQ lapack.GSVDJob + switch { + default: + panic("gsvd: bad input kind") + case kind == GSVDNone: + jobU = lapack.GSVDNone + jobV = lapack.GSVDNone + jobQ = lapack.GSVDNone + case GSVDAll&kind != 0: + if GSVDU&kind != 0 { + jobU = lapack.GSVDU + gsvd.u = blas64.General{ + Rows: r, + Cols: r, + Stride: r, + Data: use(gsvd.u.Data, r*r), + } + } + if GSVDV&kind != 0 { + jobV = lapack.GSVDV + gsvd.v = blas64.General{ + Rows: p, + Cols: p, + Stride: p, + Data: use(gsvd.v.Data, p*p), + } + } + if GSVDQ&kind != 0 { + jobQ = lapack.GSVDQ + gsvd.q = blas64.General{ + Rows: c, + Cols: c, + Stride: c, + Data: use(gsvd.q.Data, c*c), + } + } + } + + // A and B are destroyed on call, so copy the matrices. + aCopy := DenseCopyOf(a) + bCopy := DenseCopyOf(b) + + gsvd.s1 = use(gsvd.s1, c) + gsvd.s2 = use(gsvd.s2, c) + + gsvd.iwork = useInt(gsvd.iwork, c) + + gsvd.work = use(gsvd.work, 1) + lapack64.Ggsvd3(jobU, jobV, jobQ, aCopy.mat, bCopy.mat, gsvd.s1, gsvd.s2, gsvd.u, gsvd.v, gsvd.q, gsvd.work, -1, gsvd.iwork) + gsvd.work = use(gsvd.work, int(gsvd.work[0])) + gsvd.k, gsvd.l, ok = lapack64.Ggsvd3(jobU, jobV, jobQ, aCopy.mat, bCopy.mat, gsvd.s1, gsvd.s2, gsvd.u, gsvd.v, gsvd.q, gsvd.work, len(gsvd.work), gsvd.iwork) + if ok { + gsvd.a = aCopy.mat + gsvd.b = bCopy.mat + gsvd.kind = kind + } + return ok +} + +// Kind returns the GSVDKind of the decomposition. If no decomposition has been +// computed, Kind returns -1. +func (gsvd *GSVD) Kind() GSVDKind { + if !gsvd.succFact() { + return -1 + } + return gsvd.kind +} + +// Rank returns the k and l terms of the rank of [ Aᵀ Bᵀ ]ᵀ. +func (gsvd *GSVD) Rank() (k, l int) { + return gsvd.k, gsvd.l +} + +// GeneralizedValues returns the generalized singular values of the factorized matrices. +// If the input slice is non-nil, the values will be stored in-place into the slice. +// In this case, the slice must have length min(r,c)-k, and GeneralizedValues will +// panic with ErrSliceLengthMismatch otherwise. If the input slice is nil, +// a new slice of the appropriate length will be allocated and returned. +// +// GeneralizedValues will panic if the receiver does not contain a successful factorization. +func (gsvd *GSVD) GeneralizedValues(v []float64) []float64 { + if !gsvd.succFact() { + panic(badFact) + } + r := gsvd.r + c := gsvd.c + k := gsvd.k + d := min(r, c) + if v == nil { + v = make([]float64, d-k) + } + if len(v) != d-k { + panic(ErrSliceLengthMismatch) + } + floats.DivTo(v, gsvd.s1[k:d], gsvd.s2[k:d]) + return v +} + +// ValuesA returns the singular values of the factorized A matrix. +// If the input slice is non-nil, the values will be stored in-place into the slice. +// In this case, the slice must have length min(r,c)-k, and ValuesA will panic with +// ErrSliceLengthMismatch otherwise. If the input slice is nil, +// a new slice of the appropriate length will be allocated and returned. +// +// ValuesA will panic if the receiver does not contain a successful factorization. +func (gsvd *GSVD) ValuesA(s []float64) []float64 { + if !gsvd.succFact() { + panic(badFact) + } + r := gsvd.r + c := gsvd.c + k := gsvd.k + d := min(r, c) + if s == nil { + s = make([]float64, d-k) + } + if len(s) != d-k { + panic(ErrSliceLengthMismatch) + } + copy(s, gsvd.s1[k:min(r, c)]) + return s +} + +// ValuesB returns the singular values of the factorized B matrix. +// If the input slice is non-nil, the values will be stored in-place into the slice. +// In this case, the slice must have length min(r,c)-k, and ValuesB will panic with +// ErrSliceLengthMismatch otherwise. If the input slice is nil, +// a new slice of the appropriate length will be allocated and returned. +// +// ValuesB will panic if the receiver does not contain a successful factorization. +func (gsvd *GSVD) ValuesB(s []float64) []float64 { + if !gsvd.succFact() { + panic(badFact) + } + r := gsvd.r + c := gsvd.c + k := gsvd.k + d := min(r, c) + if s == nil { + s = make([]float64, d-k) + } + if len(s) != d-k { + panic(ErrSliceLengthMismatch) + } + copy(s, gsvd.s2[k:d]) + return s +} + +// ZeroRTo extracts the matrix [ 0 R ] from the singular value decomposition, +// storing the result into dst. [ 0 R ] is of size (k+l)×c. +// +// If dst is empty, ZeroRTo will resize dst to be (k+l)×c. When dst is +// non-empty, ZeroRTo will panic if dst is not (k+l)×c. ZeroRTo will also panic +// if the receiver does not contain a successful factorization. +func (gsvd *GSVD) ZeroRTo(dst *Dense) { + if !gsvd.succFact() { + panic(badFact) + } + r := gsvd.r + c := gsvd.c + k := gsvd.k + l := gsvd.l + h := min(k+l, r) + if dst.IsEmpty() { + dst.ReuseAs(k+l, c) + } else { + r2, c2 := dst.Dims() + if r2 != k+l || c != c2 { + panic(ErrShape) + } + dst.Zero() + } + a := Dense{ + mat: gsvd.a, + capRows: r, + capCols: c, + } + dst.slice(0, h, c-k-l, c).Copy(a.Slice(0, h, c-k-l, c)) + if r < k+l { + b := Dense{ + mat: gsvd.b, + capRows: gsvd.p, + capCols: c, + } + dst.slice(r, k+l, c+r-k-l, c).Copy(b.Slice(r-k, l, c+r-k-l, c)) + } +} + +// SigmaATo extracts the matrix Σ₁ from the singular value decomposition, storing +// the result into dst. Σ₁ is size r×(k+l). +// +// If dst is empty, SigmaATo will resize dst to be r×(k+l). When dst is +// non-empty, SigmATo will panic if dst is not r×(k+l). SigmaATo will also +// panic if the receiver does not contain a successful factorization. +func (gsvd *GSVD) SigmaATo(dst *Dense) { + if !gsvd.succFact() { + panic(badFact) + } + r := gsvd.r + k := gsvd.k + l := gsvd.l + if dst.IsEmpty() { + dst.ReuseAs(r, k+l) + } else { + r2, c := dst.Dims() + if r2 != r || c != k+l { + panic(ErrShape) + } + dst.Zero() + } + for i := 0; i < k; i++ { + dst.set(i, i, 1) + } + for i := k; i < min(r, k+l); i++ { + dst.set(i, i, gsvd.s1[i]) + } +} + +// SigmaBTo extracts the matrix Σ₂ from the singular value decomposition, storing +// the result into dst. Σ₂ is size p×(k+l). +// +// If dst is empty, SigmaBTo will resize dst to be p×(k+l). When dst is +// non-empty, SigmBTo will panic if dst is not p×(k+l). SigmaBTo will also +// panic if the receiver does not contain a successful factorization. +func (gsvd *GSVD) SigmaBTo(dst *Dense) { + if !gsvd.succFact() { + panic(badFact) + } + r := gsvd.r + p := gsvd.p + k := gsvd.k + l := gsvd.l + if dst.IsEmpty() { + dst.ReuseAs(p, k+l) + } else { + r, c := dst.Dims() + if r != p || c != k+l { + panic(ErrShape) + } + dst.Zero() + } + for i := 0; i < min(l, r-k); i++ { + dst.set(i, i+k, gsvd.s2[k+i]) + } + for i := r - k; i < l; i++ { + dst.set(i, i+k, 1) + } +} + +// UTo extracts the matrix U from the singular value decomposition, storing +// the result into dst. U is size r×r. +// +// If dst is empty, UTo will resize dst to be r×r. When dst is +// non-empty, UTo will panic if dst is not r×r. UTo will also +// panic if the receiver does not contain a successful factorization. +func (gsvd *GSVD) UTo(dst *Dense) { + if !gsvd.succFact() { + panic(badFact) + } + if gsvd.kind&GSVDU == 0 { + panic("mat: improper GSVD kind") + } + r := gsvd.u.Rows + c := gsvd.u.Cols + if dst.IsEmpty() { + dst.ReuseAs(r, c) + } else { + r2, c2 := dst.Dims() + if r != r2 || c != c2 { + panic(ErrShape) + } + } + + tmp := &Dense{ + mat: gsvd.u, + capRows: r, + capCols: c, + } + dst.Copy(tmp) +} + +// VTo extracts the matrix V from the singular value decomposition, storing +// the result into dst. V is size p×p. +// +// If dst is empty, VTo will resize dst to be p×p. When dst is +// non-empty, VTo will panic if dst is not p×p. VTo will also +// panic if the receiver does not contain a successful factorization. +func (gsvd *GSVD) VTo(dst *Dense) { + if !gsvd.succFact() { + panic(badFact) + } + if gsvd.kind&GSVDV == 0 { + panic("mat: improper GSVD kind") + } + r := gsvd.v.Rows + c := gsvd.v.Cols + if dst.IsEmpty() { + dst.ReuseAs(r, c) + } else { + r2, c2 := dst.Dims() + if r != r2 || c != c2 { + panic(ErrShape) + } + } + + tmp := &Dense{ + mat: gsvd.v, + capRows: r, + capCols: c, + } + dst.Copy(tmp) +} + +// QTo extracts the matrix Q from the singular value decomposition, storing +// the result into dst. Q is size c×c. +// +// If dst is empty, QTo will resize dst to be c×c. When dst is +// non-empty, QTo will panic if dst is not c×c. QTo will also +// panic if the receiver does not contain a successful factorization. +func (gsvd *GSVD) QTo(dst *Dense) { + if !gsvd.succFact() { + panic(badFact) + } + if gsvd.kind&GSVDQ == 0 { + panic("mat: improper GSVD kind") + } + r := gsvd.q.Rows + c := gsvd.q.Cols + if dst.IsEmpty() { + dst.ReuseAs(r, c) + } else { + r2, c2 := dst.Dims() + if r != r2 || c != c2 { + panic(ErrShape) + } + } + + tmp := &Dense{ + mat: gsvd.q, + capRows: r, + capCols: c, + } + dst.Copy(tmp) +} diff --git a/vendor/gonum.org/v1/gonum/mat/hogsvd.go b/vendor/gonum.org/v1/gonum/mat/hogsvd.go new file mode 100644 index 0000000000..40a03315b9 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/mat/hogsvd.go @@ -0,0 +1,239 @@ +// Copyright ©2017 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package mat + +import ( + "errors" + + "gonum.org/v1/gonum/blas/blas64" +) + +// HOGSVD is a type for creating and using the Higher Order Generalized Singular Value +// Decomposition (HOGSVD) of a set of matrices. +// +// The factorization is a linear transformation of the data sets from the given +// variable×sample spaces to reduced and diagonalized "eigenvariable"×"eigensample" +// spaces. +type HOGSVD struct { + n int + v *Dense + b []Dense + + err error +} + +// succFact returns whether the receiver contains a successful factorization. +func (gsvd *HOGSVD) succFact() bool { + return gsvd.n != 0 +} + +// Factorize computes the higher order generalized singular value decomposition (HOGSVD) +// of the n input r_i×c column tall matrices in m. HOGSV extends the GSVD case from 2 to n +// input matrices. +// +// M_0 = U_0 * Σ_0 * Vᵀ +// M_1 = U_1 * Σ_1 * Vᵀ +// . +// . +// . +// M_{n-1} = U_{n-1} * Σ_{n-1} * Vᵀ +// +// where U_i are r_i×c matrices of singular vectors, Σ are c×c matrices singular values, and V +// is a c×c matrix of singular vectors. +// +// Factorize returns whether the decomposition succeeded. If the decomposition +// failed, routines that require a successful factorization will panic. +func (gsvd *HOGSVD) Factorize(m ...Matrix) (ok bool) { + // Factorize performs the HOGSVD factorisation + // essentially as described by Ponnapalli et al. + // https://doi.org/10.1371/journal.pone.0028072 + + if len(m) < 2 { + panic("hogsvd: too few matrices") + } + gsvd.n = 0 + + r, c := m[0].Dims() + a := make([]Cholesky, len(m)) + var ts SymDense + for i, d := range m { + rd, cd := d.Dims() + if rd < cd { + gsvd.err = ErrShape + return false + } + if rd > r { + r = rd + } + if cd != c { + panic(ErrShape) + } + ts.Reset() + ts.SymOuterK(1, d.T()) + ok = a[i].Factorize(&ts) + if !ok { + gsvd.err = errors.New("hogsvd: cholesky decomposition failed") + return false + } + } + + s := getDenseWorkspace(c, c, true) + defer putDenseWorkspace(s) + sij := getDenseWorkspace(c, c, false) + defer putDenseWorkspace(sij) + for i, ai := range a { + for _, aj := range a[i+1:] { + gsvd.err = ai.SolveCholTo(sij, &aj) + if gsvd.err != nil { + return false + } + s.Add(s, sij) + + gsvd.err = aj.SolveCholTo(sij, &ai) + if gsvd.err != nil { + return false + } + s.Add(s, sij) + } + } + s.Scale(1/float64(len(m)*(len(m)-1)), s) + + var eig Eigen + ok = eig.Factorize(s.T(), EigenRight) + if !ok { + gsvd.err = errors.New("hogsvd: eigen decomposition failed") + return false + } + var vc CDense + eig.VectorsTo(&vc) + // vc is guaranteed to have real eigenvalues. + rc, cc := vc.Dims() + v := NewDense(rc, cc, nil) + for i := 0; i < rc; i++ { + for j := 0; j < cc; j++ { + a := vc.At(i, j) + v.set(i, j, real(a)) + } + } + // Rescale the columns of v by their Frobenius norms. + // Work done in cv is reflected in v. + var cv VecDense + for j := 0; j < c; j++ { + cv.ColViewOf(v, j) + cv.ScaleVec(1/blas64.Nrm2(cv.mat), &cv) + } + + b := make([]Dense, len(m)) + biT := getDenseWorkspace(c, r, false) + defer putDenseWorkspace(biT) + for i, d := range m { + // All calls to reset will leave an emptied + // matrix with capacity to store the result + // without additional allocation. + biT.Reset() + gsvd.err = biT.Solve(v, d.T()) + if gsvd.err != nil { + return false + } + b[i].CloneFrom(biT.T()) + } + + gsvd.n = len(m) + gsvd.v = v + gsvd.b = b + return true +} + +// Err returns the reason for a factorization failure. +func (gsvd *HOGSVD) Err() error { + return gsvd.err +} + +// Len returns the number of matrices that have been factorized. If Len returns +// zero, the factorization was not successful. +func (gsvd *HOGSVD) Len() int { + return gsvd.n +} + +// UTo extracts the matrix U_n from the singular value decomposition, storing +// the result in-place into dst. U_n is size r×c. +// +// If dst is empty, UTo will resize dst to be r×c. When dst is +// non-empty, UTo will panic if dst is not r×c. UTo will also +// panic if the receiver does not contain a successful factorization. +func (gsvd *HOGSVD) UTo(dst *Dense, n int) { + if !gsvd.succFact() { + panic(badFact) + } + if n < 0 || gsvd.n <= n { + panic("hogsvd: invalid index") + } + r, c := gsvd.b[n].Dims() + if dst.IsEmpty() { + dst.ReuseAs(r, c) + } else { + r2, c2 := dst.Dims() + if r != r2 || c != c2 { + panic(ErrShape) + } + } + dst.Copy(&gsvd.b[n]) + var v VecDense + for j, f := range gsvd.Values(nil, n) { + v.ColViewOf(dst, j) + v.ScaleVec(1/f, &v) + } +} + +// Values returns the nth set of singular values of the factorized system. +// If the input slice is non-nil, the values will be stored in-place into the slice. +// In this case, the slice must have length c, and Values will panic with +// ErrSliceLengthMismatch otherwise. If the input slice is nil, +// a new slice of the appropriate length will be allocated and returned. +// +// Values will panic if the receiver does not contain a successful factorization. +func (gsvd *HOGSVD) Values(s []float64, n int) []float64 { + if !gsvd.succFact() { + panic(badFact) + } + if n < 0 || gsvd.n <= n { + panic("hogsvd: invalid index") + } + + _, c := gsvd.b[n].Dims() + if s == nil { + s = make([]float64, c) + } else if len(s) != c { + panic(ErrSliceLengthMismatch) + } + var v VecDense + for j := 0; j < c; j++ { + v.ColViewOf(&gsvd.b[n], j) + s[j] = blas64.Nrm2(v.mat) + } + return s +} + +// VTo extracts the matrix V from the singular value decomposition, storing +// the result in-place into dst. V is size c×c. +// +// If dst is empty, VTo will resize dst to be c×c. When dst is +// non-empty, VTo will panic if dst is not c×c. VTo will also +// panic if the receiver does not contain a successful factorization. +func (gsvd *HOGSVD) VTo(dst *Dense) { + if !gsvd.succFact() { + panic(badFact) + } + r, c := gsvd.v.Dims() + if dst.IsEmpty() { + dst.ReuseAs(r, c) + } else { + r2, c2 := dst.Dims() + if r != r2 || c != c2 { + panic(ErrShape) + } + } + dst.Copy(gsvd.v) +} diff --git a/vendor/gonum.org/v1/gonum/mat/index_bound_checks.go b/vendor/gonum.org/v1/gonum/mat/index_bound_checks.go new file mode 100644 index 0000000000..59a9e04788 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/mat/index_bound_checks.go @@ -0,0 +1,398 @@ +// Copyright ©2014 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// This file must be kept in sync with index_no_bound_checks.go. + +//go:build bounds +// +build bounds + +package mat + +// At returns the element at row i, column j. +func (m *Dense) At(i, j int) float64 { + return m.at(i, j) +} + +func (m *Dense) at(i, j int) float64 { + if uint(i) >= uint(m.mat.Rows) { + panic(ErrRowAccess) + } + if uint(j) >= uint(m.mat.Cols) { + panic(ErrColAccess) + } + return m.mat.Data[i*m.mat.Stride+j] +} + +// Set sets the element at row i, column j to the value v. +func (m *Dense) Set(i, j int, v float64) { + m.set(i, j, v) +} + +func (m *Dense) set(i, j int, v float64) { + if uint(i) >= uint(m.mat.Rows) { + panic(ErrRowAccess) + } + if uint(j) >= uint(m.mat.Cols) { + panic(ErrColAccess) + } + m.mat.Data[i*m.mat.Stride+j] = v +} + +// At returns the element at row i, column j. +func (m *CDense) At(i, j int) complex128 { + return m.at(i, j) +} + +func (m *CDense) at(i, j int) complex128 { + if uint(i) >= uint(m.mat.Rows) { + panic(ErrRowAccess) + } + if uint(j) >= uint(m.mat.Cols) { + panic(ErrColAccess) + } + return m.mat.Data[i*m.mat.Stride+j] +} + +// Set sets the element at row i, column j to the value v. +func (m *CDense) Set(i, j int, v complex128) { + m.set(i, j, v) +} + +func (m *CDense) set(i, j int, v complex128) { + if uint(i) >= uint(m.mat.Rows) { + panic(ErrRowAccess) + } + if uint(j) >= uint(m.mat.Cols) { + panic(ErrColAccess) + } + m.mat.Data[i*m.mat.Stride+j] = v +} + +// At returns the element at row i. +// It panics if i is out of bounds or if j is not zero. +func (v *VecDense) At(i, j int) float64 { + if j != 0 { + panic(ErrColAccess) + } + return v.at(i) +} + +// AtVec returns the element at row i. +// It panics if i is out of bounds. +func (v *VecDense) AtVec(i int) float64 { + return v.at(i) +} + +func (v *VecDense) at(i int) float64 { + if uint(i) >= uint(v.mat.N) { + panic(ErrRowAccess) + } + return v.mat.Data[i*v.mat.Inc] +} + +// SetVec sets the element at row i to the value val. +// It panics if i is out of bounds. +func (v *VecDense) SetVec(i int, val float64) { + v.setVec(i, val) +} + +func (v *VecDense) setVec(i int, val float64) { + if uint(i) >= uint(v.mat.N) { + panic(ErrVectorAccess) + } + v.mat.Data[i*v.mat.Inc] = val +} + +// At returns the element at row i and column j. +func (t *SymDense) At(i, j int) float64 { + return t.at(i, j) +} + +func (t *SymDense) at(i, j int) float64 { + if uint(i) >= uint(t.mat.N) { + panic(ErrRowAccess) + } + if uint(j) >= uint(t.mat.N) { + panic(ErrColAccess) + } + if i > j { + i, j = j, i + } + return t.mat.Data[i*t.mat.Stride+j] +} + +// SetSym sets the elements at (i,j) and (j,i) to the value v. +func (t *SymDense) SetSym(i, j int, v float64) { + t.set(i, j, v) +} + +func (t *SymDense) set(i, j int, v float64) { + if uint(i) >= uint(t.mat.N) { + panic(ErrRowAccess) + } + if uint(j) >= uint(t.mat.N) { + panic(ErrColAccess) + } + if i > j { + i, j = j, i + } + t.mat.Data[i*t.mat.Stride+j] = v +} + +// At returns the element at row i, column j. +func (t *TriDense) At(i, j int) float64 { + return t.at(i, j) +} + +func (t *TriDense) at(i, j int) float64 { + if uint(i) >= uint(t.mat.N) { + panic(ErrRowAccess) + } + if uint(j) >= uint(t.mat.N) { + panic(ErrColAccess) + } + isUpper := t.isUpper() + if (isUpper && i > j) || (!isUpper && i < j) { + return 0 + } + return t.mat.Data[i*t.mat.Stride+j] +} + +// SetTri sets the element of the triangular matrix at row i, column j to the value v. +// It panics if the location is outside the appropriate half of the matrix. +func (t *TriDense) SetTri(i, j int, v float64) { + t.set(i, j, v) +} + +func (t *TriDense) set(i, j int, v float64) { + if uint(i) >= uint(t.mat.N) { + panic(ErrRowAccess) + } + if uint(j) >= uint(t.mat.N) { + panic(ErrColAccess) + } + isUpper := t.isUpper() + if (isUpper && i > j) || (!isUpper && i < j) { + panic(ErrTriangleSet) + } + t.mat.Data[i*t.mat.Stride+j] = v +} + +// At returns the element at row i, column j. +func (b *BandDense) At(i, j int) float64 { + return b.at(i, j) +} + +func (b *BandDense) at(i, j int) float64 { + if uint(i) >= uint(b.mat.Rows) { + panic(ErrRowAccess) + } + if uint(j) >= uint(b.mat.Cols) { + panic(ErrColAccess) + } + pj := j + b.mat.KL - i + if pj < 0 || b.mat.KL+b.mat.KU+1 <= pj { + return 0 + } + return b.mat.Data[i*b.mat.Stride+pj] +} + +// SetBand sets the element at row i, column j to the value v. +// It panics if the location is outside the appropriate region of the matrix. +func (b *BandDense) SetBand(i, j int, v float64) { + b.set(i, j, v) +} + +func (b *BandDense) set(i, j int, v float64) { + if uint(i) >= uint(b.mat.Rows) { + panic(ErrRowAccess) + } + if uint(j) >= uint(b.mat.Cols) { + panic(ErrColAccess) + } + pj := j + b.mat.KL - i + if pj < 0 || b.mat.KL+b.mat.KU+1 <= pj { + panic(ErrBandSet) + } + b.mat.Data[i*b.mat.Stride+pj] = v +} + +// At returns the element at row i, column j. +func (s *SymBandDense) At(i, j int) float64 { + return s.at(i, j) +} + +func (s *SymBandDense) at(i, j int) float64 { + if uint(i) >= uint(s.mat.N) { + panic(ErrRowAccess) + } + if uint(j) >= uint(s.mat.N) { + panic(ErrColAccess) + } + if i > j { + i, j = j, i + } + pj := j - i + if s.mat.K+1 <= pj { + return 0 + } + return s.mat.Data[i*s.mat.Stride+pj] +} + +// SetSymBand sets the element at row i, column j to the value v. +// It panics if the location is outside the appropriate region of the matrix. +func (s *SymBandDense) SetSymBand(i, j int, v float64) { + s.set(i, j, v) +} + +func (s *SymBandDense) set(i, j int, v float64) { + if uint(i) >= uint(s.mat.N) { + panic(ErrRowAccess) + } + if uint(j) >= uint(s.mat.N) { + panic(ErrColAccess) + } + if i > j { + i, j = j, i + } + pj := j - i + if s.mat.K+1 <= pj { + panic(ErrBandSet) + } + s.mat.Data[i*s.mat.Stride+pj] = v +} + +func (t *TriBandDense) At(i, j int) float64 { + return t.at(i, j) +} + +func (t *TriBandDense) at(i, j int) float64 { + // TODO(btracey): Support Diag field, see #692. + if uint(i) >= uint(t.mat.N) { + panic(ErrRowAccess) + } + if uint(j) >= uint(t.mat.N) { + panic(ErrColAccess) + } + isUpper := t.isUpper() + if (isUpper && i > j) || (!isUpper && i < j) { + return 0 + } + kl, ku := t.mat.K, 0 + if isUpper { + kl, ku = 0, t.mat.K + } + pj := j + kl - i + if pj < 0 || kl+ku+1 <= pj { + return 0 + } + return t.mat.Data[i*t.mat.Stride+pj] +} + +func (t *TriBandDense) SetTriBand(i, j int, v float64) { + t.setTriBand(i, j, v) +} + +func (t *TriBandDense) setTriBand(i, j int, v float64) { + if uint(i) >= uint(t.mat.N) { + panic(ErrRowAccess) + } + if uint(j) >= uint(t.mat.N) { + panic(ErrColAccess) + } + isUpper := t.isUpper() + if (isUpper && i > j) || (!isUpper && i < j) { + panic(ErrTriangleSet) + } + kl, ku := t.mat.K, 0 + if isUpper { + kl, ku = 0, t.mat.K + } + pj := j + kl - i + if pj < 0 || kl+ku+1 <= pj { + panic(ErrBandSet) + } + // TODO(btracey): Support Diag field, see #692. + t.mat.Data[i*t.mat.Stride+pj] = v +} + +// At returns the element at row i, column j. +func (d *DiagDense) At(i, j int) float64 { + return d.at(i, j) +} + +func (d *DiagDense) at(i, j int) float64 { + if uint(i) >= uint(d.mat.N) { + panic(ErrRowAccess) + } + if uint(j) >= uint(d.mat.N) { + panic(ErrColAccess) + } + if i != j { + return 0 + } + return d.mat.Data[i*d.mat.Inc] +} + +// SetDiag sets the element at row i, column i to the value v. +// It panics if the location is outside the appropriate region of the matrix. +func (d *DiagDense) SetDiag(i int, v float64) { + d.setDiag(i, v) +} + +func (d *DiagDense) setDiag(i int, v float64) { + if uint(i) >= uint(d.mat.N) { + panic(ErrRowAccess) + } + d.mat.Data[i*d.mat.Inc] = v +} + +// At returns the element at row i, column j. +func (a *Tridiag) At(i, j int) float64 { + return a.at(i, j) +} + +func (a *Tridiag) at(i, j int) float64 { + if uint(i) >= uint(a.mat.N) { + panic(ErrRowAccess) + } + if uint(j) >= uint(a.mat.N) { + panic(ErrColAccess) + } + switch i - j { + case -1: + return a.mat.DU[i] + case 0: + return a.mat.D[i] + case 1: + return a.mat.DL[j] + default: + return 0 + } +} + +// SetBand sets the element at row i, column j to the value v. +// It panics if the location is outside the appropriate region of the matrix. +func (a *Tridiag) SetBand(i, j int, v float64) { + a.set(i, j, v) +} + +func (a *Tridiag) set(i, j int, v float64) { + if uint(i) >= uint(a.mat.N) { + panic(ErrRowAccess) + } + if uint(j) >= uint(a.mat.N) { + panic(ErrColAccess) + } + switch i - j { + case -1: + a.mat.DU[i] = v + case 0: + a.mat.D[i] = v + case 1: + a.mat.DL[j] = v + default: + panic(ErrBandSet) + } +} diff --git a/vendor/gonum.org/v1/gonum/mat/index_no_bound_checks.go b/vendor/gonum.org/v1/gonum/mat/index_no_bound_checks.go new file mode 100644 index 0000000000..335128806f --- /dev/null +++ b/vendor/gonum.org/v1/gonum/mat/index_no_bound_checks.go @@ -0,0 +1,400 @@ +// Copyright ©2014 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// This file must be kept in sync with index_bound_checks.go. + +//go:build !bounds +// +build !bounds + +package mat + +// At returns the element at row i, column j. +func (m *Dense) At(i, j int) float64 { + if uint(i) >= uint(m.mat.Rows) { + panic(ErrRowAccess) + } + if uint(j) >= uint(m.mat.Cols) { + panic(ErrColAccess) + } + return m.at(i, j) +} + +func (m *Dense) at(i, j int) float64 { + return m.mat.Data[i*m.mat.Stride+j] +} + +// Set sets the element at row i, column j to the value v. +func (m *Dense) Set(i, j int, v float64) { + if uint(i) >= uint(m.mat.Rows) { + panic(ErrRowAccess) + } + if uint(j) >= uint(m.mat.Cols) { + panic(ErrColAccess) + } + m.set(i, j, v) +} + +func (m *Dense) set(i, j int, v float64) { + m.mat.Data[i*m.mat.Stride+j] = v +} + +// At returns the element at row i, column j. +func (m *CDense) At(i, j int) complex128 { + if uint(i) >= uint(m.mat.Rows) { + panic(ErrRowAccess) + } + if uint(j) >= uint(m.mat.Cols) { + panic(ErrColAccess) + } + return m.at(i, j) +} + +func (m *CDense) at(i, j int) complex128 { + return m.mat.Data[i*m.mat.Stride+j] +} + +// Set sets the element at row i, column j to the value v. +func (m *CDense) Set(i, j int, v complex128) { + if uint(i) >= uint(m.mat.Rows) { + panic(ErrRowAccess) + } + if uint(j) >= uint(m.mat.Cols) { + panic(ErrColAccess) + } + m.set(i, j, v) +} + +func (m *CDense) set(i, j int, v complex128) { + m.mat.Data[i*m.mat.Stride+j] = v +} + +// At returns the element at row i. +// It panics if i is out of bounds or if j is not zero. +func (v *VecDense) At(i, j int) float64 { + if uint(i) >= uint(v.mat.N) { + panic(ErrRowAccess) + } + if j != 0 { + panic(ErrColAccess) + } + return v.at(i) +} + +// AtVec returns the element at row i. +// It panics if i is out of bounds. +func (v *VecDense) AtVec(i int) float64 { + if uint(i) >= uint(v.mat.N) { + panic(ErrRowAccess) + } + return v.at(i) +} + +func (v *VecDense) at(i int) float64 { + return v.mat.Data[i*v.mat.Inc] +} + +// SetVec sets the element at row i to the value val. +// It panics if i is out of bounds. +func (v *VecDense) SetVec(i int, val float64) { + if uint(i) >= uint(v.mat.N) { + panic(ErrVectorAccess) + } + v.setVec(i, val) +} + +func (v *VecDense) setVec(i int, val float64) { + v.mat.Data[i*v.mat.Inc] = val +} + +// At returns the element at row i and column j. +func (s *SymDense) At(i, j int) float64 { + if uint(i) >= uint(s.mat.N) { + panic(ErrRowAccess) + } + if uint(j) >= uint(s.mat.N) { + panic(ErrColAccess) + } + return s.at(i, j) +} + +func (s *SymDense) at(i, j int) float64 { + if i > j { + i, j = j, i + } + return s.mat.Data[i*s.mat.Stride+j] +} + +// SetSym sets the elements at (i,j) and (j,i) to the value v. +func (s *SymDense) SetSym(i, j int, v float64) { + if uint(i) >= uint(s.mat.N) { + panic(ErrRowAccess) + } + if uint(j) >= uint(s.mat.N) { + panic(ErrColAccess) + } + s.set(i, j, v) +} + +func (s *SymDense) set(i, j int, v float64) { + if i > j { + i, j = j, i + } + s.mat.Data[i*s.mat.Stride+j] = v +} + +// At returns the element at row i, column j. +func (t *TriDense) At(i, j int) float64 { + if uint(i) >= uint(t.mat.N) { + panic(ErrRowAccess) + } + if uint(j) >= uint(t.mat.N) { + panic(ErrColAccess) + } + return t.at(i, j) +} + +func (t *TriDense) at(i, j int) float64 { + isUpper := t.triKind() + if (isUpper && i > j) || (!isUpper && i < j) { + return 0 + } + return t.mat.Data[i*t.mat.Stride+j] +} + +// SetTri sets the element at row i, column j to the value v. +// It panics if the location is outside the appropriate half of the matrix. +func (t *TriDense) SetTri(i, j int, v float64) { + if uint(i) >= uint(t.mat.N) { + panic(ErrRowAccess) + } + if uint(j) >= uint(t.mat.N) { + panic(ErrColAccess) + } + isUpper := t.isUpper() + if (isUpper && i > j) || (!isUpper && i < j) { + panic(ErrTriangleSet) + } + t.set(i, j, v) +} + +func (t *TriDense) set(i, j int, v float64) { + t.mat.Data[i*t.mat.Stride+j] = v +} + +// At returns the element at row i, column j. +func (b *BandDense) At(i, j int) float64 { + if uint(i) >= uint(b.mat.Rows) { + panic(ErrRowAccess) + } + if uint(j) >= uint(b.mat.Cols) { + panic(ErrColAccess) + } + return b.at(i, j) +} + +func (b *BandDense) at(i, j int) float64 { + pj := j + b.mat.KL - i + if pj < 0 || b.mat.KL+b.mat.KU+1 <= pj { + return 0 + } + return b.mat.Data[i*b.mat.Stride+pj] +} + +// SetBand sets the element at row i, column j to the value v. +// It panics if the location is outside the appropriate region of the matrix. +func (b *BandDense) SetBand(i, j int, v float64) { + if uint(i) >= uint(b.mat.Rows) { + panic(ErrRowAccess) + } + if uint(j) >= uint(b.mat.Cols) { + panic(ErrColAccess) + } + pj := j + b.mat.KL - i + if pj < 0 || b.mat.KL+b.mat.KU+1 <= pj { + panic(ErrBandSet) + } + b.set(i, j, v) +} + +func (b *BandDense) set(i, j int, v float64) { + pj := j + b.mat.KL - i + b.mat.Data[i*b.mat.Stride+pj] = v +} + +// At returns the element at row i, column j. +func (s *SymBandDense) At(i, j int) float64 { + if uint(i) >= uint(s.mat.N) { + panic(ErrRowAccess) + } + if uint(j) >= uint(s.mat.N) { + panic(ErrColAccess) + } + return s.at(i, j) +} + +func (s *SymBandDense) at(i, j int) float64 { + if i > j { + i, j = j, i + } + pj := j - i + if s.mat.K+1 <= pj { + return 0 + } + return s.mat.Data[i*s.mat.Stride+pj] +} + +// SetSymBand sets the element at row i, column j to the value v. +// It panics if the location is outside the appropriate region of the matrix. +func (s *SymBandDense) SetSymBand(i, j int, v float64) { + if uint(i) >= uint(s.mat.N) { + panic(ErrRowAccess) + } + if uint(j) >= uint(s.mat.N) { + panic(ErrColAccess) + } + s.set(i, j, v) +} + +func (s *SymBandDense) set(i, j int, v float64) { + if i > j { + i, j = j, i + } + pj := j - i + if s.mat.K+1 <= pj { + panic(ErrBandSet) + } + s.mat.Data[i*s.mat.Stride+pj] = v +} + +func (t *TriBandDense) At(i, j int) float64 { + if uint(i) >= uint(t.mat.N) { + panic(ErrRowAccess) + } + if uint(j) >= uint(t.mat.N) { + panic(ErrColAccess) + } + return t.at(i, j) +} + +func (t *TriBandDense) at(i, j int) float64 { + // TODO(btracey): Support Diag field, see #692. + isUpper := t.isUpper() + if (isUpper && i > j) || (!isUpper && i < j) { + return 0 + } + kl := t.mat.K + ku := 0 + if isUpper { + ku = t.mat.K + kl = 0 + } + pj := j + kl - i + if pj < 0 || kl+ku+1 <= pj { + return 0 + } + return t.mat.Data[i*t.mat.Stride+pj] +} + +func (t *TriBandDense) SetTriBand(i, j int, v float64) { + if uint(i) >= uint(t.mat.N) { + panic(ErrRowAccess) + } + if uint(j) >= uint(t.mat.N) { + panic(ErrColAccess) + } + isUpper := t.isUpper() + if (isUpper && i > j) || (!isUpper && i < j) { + panic(ErrTriangleSet) + } + kl, ku := t.mat.K, 0 + if isUpper { + kl, ku = 0, t.mat.K + } + pj := j + kl - i + if pj < 0 || kl+ku+1 <= pj { + panic(ErrBandSet) + } + // TODO(btracey): Support Diag field, see #692. + t.mat.Data[i*t.mat.Stride+pj] = v +} + +// At returns the element at row i, column j. +func (d *DiagDense) At(i, j int) float64 { + if uint(i) >= uint(d.mat.N) { + panic(ErrRowAccess) + } + if uint(j) >= uint(d.mat.N) { + panic(ErrColAccess) + } + return d.at(i, j) +} + +func (d *DiagDense) at(i, j int) float64 { + if i != j { + return 0 + } + return d.mat.Data[i*d.mat.Inc] +} + +// SetDiag sets the element at row i, column i to the value v. +// It panics if the location is outside the appropriate region of the matrix. +func (d *DiagDense) SetDiag(i int, v float64) { + if uint(i) >= uint(d.mat.N) { + panic(ErrRowAccess) + } + d.setDiag(i, v) +} + +func (d *DiagDense) setDiag(i int, v float64) { + d.mat.Data[i*d.mat.Inc] = v +} + +// At returns the element at row i, column j. +func (a *Tridiag) At(i, j int) float64 { + if uint(i) >= uint(a.mat.N) { + panic(ErrRowAccess) + } + if uint(j) >= uint(a.mat.N) { + panic(ErrColAccess) + } + return a.at(i, j) +} + +func (a *Tridiag) at(i, j int) float64 { + switch i - j { + case -1: + return a.mat.DU[i] + case 0: + return a.mat.D[i] + case 1: + return a.mat.DL[j] + default: + return 0 + } +} + +// SetBand sets the element at row i, column j to the value v. +// It panics if the location is outside the appropriate region of the matrix. +func (a *Tridiag) SetBand(i, j int, v float64) { + if uint(i) >= uint(a.mat.N) { + panic(ErrRowAccess) + } + if uint(j) >= uint(a.mat.N) { + panic(ErrColAccess) + } + a.set(i, j, v) +} + +func (a *Tridiag) set(i, j int, v float64) { + switch i - j { + case -1: + a.mat.DU[i] = v + case 0: + a.mat.D[i] = v + case 1: + a.mat.DL[j] = v + default: + panic(ErrBandSet) + } +} diff --git a/vendor/gonum.org/v1/gonum/mat/inner.go b/vendor/gonum.org/v1/gonum/mat/inner.go new file mode 100644 index 0000000000..4f94a96a6b --- /dev/null +++ b/vendor/gonum.org/v1/gonum/mat/inner.go @@ -0,0 +1,126 @@ +// Copyright ©2014 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package mat + +import ( + "gonum.org/v1/gonum/blas" + "gonum.org/v1/gonum/blas/blas64" + "gonum.org/v1/gonum/internal/asm/f64" +) + +// Inner computes the generalized inner product +// +// xᵀ A y +// +// between the vectors x and y with matrix A, where x and y are treated as +// column vectors. +// +// This is only a true inner product if A is symmetric positive definite, though +// the operation works for any matrix A. +// +// Inner panics if x.Len != m or y.Len != n when A is an m x n matrix. +func Inner(x Vector, a Matrix, y Vector) float64 { + m, n := a.Dims() + if x.Len() != m { + panic(ErrShape) + } + if y.Len() != n { + panic(ErrShape) + } + if m == 0 || n == 0 { + return 0 + } + + var sum float64 + + switch a := a.(type) { + case RawSymmetricer: + amat := a.RawSymmetric() + if amat.Uplo != blas.Upper { + // Panic as a string not a mat.Error. + panic(badSymTriangle) + } + var xmat, ymat blas64.Vector + if xrv, ok := x.(RawVectorer); ok { + xmat = xrv.RawVector() + } else { + break + } + if yrv, ok := y.(RawVectorer); ok { + ymat = yrv.RawVector() + } else { + break + } + for i := 0; i < x.Len(); i++ { + xi := x.AtVec(i) + if xi != 0 { + if ymat.Inc == 1 { + sum += xi * f64.DotUnitary( + amat.Data[i*amat.Stride+i:i*amat.Stride+n], + ymat.Data[i:], + ) + } else { + sum += xi * f64.DotInc( + amat.Data[i*amat.Stride+i:i*amat.Stride+n], + ymat.Data[i*ymat.Inc:], uintptr(n-i), + 1, uintptr(ymat.Inc), + 0, 0, + ) + } + } + yi := y.AtVec(i) + if i != n-1 && yi != 0 { + if xmat.Inc == 1 { + sum += yi * f64.DotUnitary( + amat.Data[i*amat.Stride+i+1:i*amat.Stride+n], + xmat.Data[i+1:], + ) + } else { + sum += yi * f64.DotInc( + amat.Data[i*amat.Stride+i+1:i*amat.Stride+n], + xmat.Data[(i+1)*xmat.Inc:], uintptr(n-i-1), + 1, uintptr(xmat.Inc), + 0, 0, + ) + } + } + } + return sum + case RawMatrixer: + amat := a.RawMatrix() + var ymat blas64.Vector + if yrv, ok := y.(RawVectorer); ok { + ymat = yrv.RawVector() + } else { + break + } + for i := 0; i < x.Len(); i++ { + xi := x.AtVec(i) + if xi != 0 { + if ymat.Inc == 1 { + sum += xi * f64.DotUnitary( + amat.Data[i*amat.Stride:i*amat.Stride+n], + ymat.Data, + ) + } else { + sum += xi * f64.DotInc( + amat.Data[i*amat.Stride:i*amat.Stride+n], + ymat.Data, uintptr(n), + 1, uintptr(ymat.Inc), + 0, 0, + ) + } + } + } + return sum + } + for i := 0; i < x.Len(); i++ { + xi := x.AtVec(i) + for j := 0; j < y.Len(); j++ { + sum += xi * a.At(i, j) * y.AtVec(j) + } + } + return sum +} diff --git a/vendor/gonum.org/v1/gonum/mat/io.go b/vendor/gonum.org/v1/gonum/mat/io.go new file mode 100644 index 0000000000..0641fa28b6 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/mat/io.go @@ -0,0 +1,495 @@ +// Copyright ©2015 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package mat + +import ( + "bytes" + "encoding/binary" + "errors" + "fmt" + "io" + "math" +) + +// version is the current on-disk codec version. +const version uint32 = 0x1 + +// maxLen is the biggest slice/array len one can create on a 32/64b platform. +const maxLen = int64(int(^uint(0) >> 1)) + +var ( + headerSize = binary.Size(storage{}) + sizeFloat64 = binary.Size(float64(0)) + + errWrongType = errors.New("mat: wrong data type") + + errTooBig = errors.New("mat: resulting data slice too big") + errTooSmall = errors.New("mat: input slice too small") + errBadBuffer = errors.New("mat: data buffer size mismatch") + errBadSize = errors.New("mat: invalid dimension") +) + +// Type encoding scheme: +// +// Type Form Packing Uplo Unit Rows Columns kU kL +// uint8 [GST] uint8 [BPF] uint8 [AUL] bool int64 int64 int64 int64 +// General 'G' 'F' 'A' false r c 0 0 +// Band 'G' 'B' 'A' false r c kU kL +// Symmetric 'S' 'F' ul false n n 0 0 +// SymmetricBand 'S' 'B' ul false n n k k +// SymmetricPacked 'S' 'P' ul false n n 0 0 +// Triangular 'T' 'F' ul Diag==Unit n n 0 0 +// TriangularBand 'T' 'B' ul Diag==Unit n n k k +// TriangularPacked 'T' 'P' ul Diag==Unit n n 0 0 +// +// G - general, S - symmetric, T - triangular +// F - full, B - band, P - packed +// A - all, U - upper, L - lower + +// MarshalBinary encodes the receiver into a binary form and returns the result. +// +// Dense is little-endian encoded as follows: +// +// 0 - 3 Version = 1 (uint32) +// 4 'G' (byte) +// 5 'F' (byte) +// 6 'A' (byte) +// 7 0 (byte) +// 8 - 15 number of rows (int64) +// 16 - 23 number of columns (int64) +// 24 - 31 0 (int64) +// 32 - 39 0 (int64) +// 40 - .. matrix data elements (float64) +// [0,0] [0,1] ... [0,ncols-1] +// [1,0] [1,1] ... [1,ncols-1] +// ... +// [nrows-1,0] ... [nrows-1,ncols-1] +func (m Dense) MarshalBinary() ([]byte, error) { + bufLen := int64(headerSize) + int64(m.mat.Rows)*int64(m.mat.Cols)*int64(sizeFloat64) + if bufLen <= 0 { + // bufLen is too big and has wrapped around. + return nil, errTooBig + } + + header := storage{ + Form: 'G', Packing: 'F', Uplo: 'A', + Rows: int64(m.mat.Rows), Cols: int64(m.mat.Cols), + Version: version, + } + buf := make([]byte, bufLen) + n, err := header.marshalBinaryTo(bytes.NewBuffer(buf[:0])) + if err != nil { + return buf[:n], err + } + + p := headerSize + r, c := m.Dims() + for i := 0; i < r; i++ { + for j := 0; j < c; j++ { + binary.LittleEndian.PutUint64(buf[p:p+sizeFloat64], math.Float64bits(m.at(i, j))) + p += sizeFloat64 + } + } + + return buf, nil +} + +// MarshalBinaryTo encodes the receiver into a binary form and writes it into w. +// MarshalBinaryTo returns the number of bytes written into w and an error, if any. +// +// See MarshalBinary for the on-disk layout. +func (m Dense) MarshalBinaryTo(w io.Writer) (int, error) { + header := storage{ + Form: 'G', Packing: 'F', Uplo: 'A', + Rows: int64(m.mat.Rows), Cols: int64(m.mat.Cols), + Version: version, + } + n, err := header.marshalBinaryTo(w) + if err != nil { + return n, err + } + + r, c := m.Dims() + var b [8]byte + for i := 0; i < r; i++ { + for j := 0; j < c; j++ { + binary.LittleEndian.PutUint64(b[:], math.Float64bits(m.at(i, j))) + nn, err := w.Write(b[:]) + n += nn + if err != nil { + return n, err + } + } + } + + return n, nil +} + +// UnmarshalBinary decodes the binary form into the receiver. +// It panics if the receiver is a non-empty Dense matrix. +// +// See MarshalBinary for the on-disk layout. +// +// Limited checks on the validity of the binary input are performed: +// - ErrShape is returned if the number of rows or columns is negative, +// - an error is returned if the resulting Dense matrix is too +// big for the current architecture (e.g. a 16GB matrix written by a +// 64b application and read back from a 32b application.) +// +// UnmarshalBinary does not limit the size of the unmarshaled matrix, and so +// it should not be used on untrusted data. +func (m *Dense) UnmarshalBinary(data []byte) error { + if !m.IsEmpty() { + panic("mat: unmarshal into non-empty matrix") + } + + if len(data) < headerSize { + return errTooSmall + } + + var header storage + err := header.unmarshalBinary(data[:headerSize]) + if err != nil { + return err + } + rows := header.Rows + cols := header.Cols + header.Version = 0 + header.Rows = 0 + header.Cols = 0 + if (header != storage{Form: 'G', Packing: 'F', Uplo: 'A'}) { + return errWrongType + } + if rows < 0 || cols < 0 { + return errBadSize + } + size := rows * cols + if size == 0 { + return ErrZeroLength + } + if int(size) < 0 || size > maxLen { + return errTooBig + } + if len(data) != headerSize+int(rows*cols)*sizeFloat64 { + return errBadBuffer + } + + p := headerSize + m.reuseAsNonZeroed(int(rows), int(cols)) + for i := range m.mat.Data { + m.mat.Data[i] = math.Float64frombits(binary.LittleEndian.Uint64(data[p : p+sizeFloat64])) + p += sizeFloat64 + } + + return nil +} + +// UnmarshalBinaryFrom decodes the binary form into the receiver and returns +// the number of bytes read and an error if any. +// It panics if the receiver is a non-empty Dense matrix. +// +// See MarshalBinary for the on-disk layout. +// +// Limited checks on the validity of the binary input are performed: +// - ErrShape is returned if the number of rows or columns is negative, +// - an error is returned if the resulting Dense matrix is too +// big for the current architecture (e.g. a 16GB matrix written by a +// 64b application and read back from a 32b application.) +// +// UnmarshalBinary does not limit the size of the unmarshaled matrix, and so +// it should not be used on untrusted data. +func (m *Dense) UnmarshalBinaryFrom(r io.Reader) (int, error) { + if !m.IsEmpty() { + panic("mat: unmarshal into non-empty matrix") + } + + var header storage + n, err := header.unmarshalBinaryFrom(r) + if err != nil { + return n, err + } + rows := header.Rows + cols := header.Cols + header.Version = 0 + header.Rows = 0 + header.Cols = 0 + if (header != storage{Form: 'G', Packing: 'F', Uplo: 'A'}) { + return n, errWrongType + } + if rows < 0 || cols < 0 { + return n, errBadSize + } + size := rows * cols + if size == 0 { + return n, ErrZeroLength + } + if int(size) < 0 || size > maxLen { + return n, errTooBig + } + + m.reuseAsNonZeroed(int(rows), int(cols)) + var b [8]byte + for i := range m.mat.Data { + nn, err := readFull(r, b[:]) + n += nn + if err != nil { + if err == io.EOF { + return n, io.ErrUnexpectedEOF + } + return n, err + } + m.mat.Data[i] = math.Float64frombits(binary.LittleEndian.Uint64(b[:])) + } + + return n, nil +} + +// MarshalBinary encodes the receiver into a binary form and returns the result. +// +// VecDense is little-endian encoded as follows: +// +// 0 - 3 Version = 1 (uint32) +// 4 'G' (byte) +// 5 'F' (byte) +// 6 'A' (byte) +// 7 0 (byte) +// 8 - 15 number of elements (int64) +// 16 - 23 1 (int64) +// 24 - 31 0 (int64) +// 32 - 39 0 (int64) +// 40 - .. vector's data elements (float64) +func (v VecDense) MarshalBinary() ([]byte, error) { + bufLen := int64(headerSize) + int64(v.mat.N)*int64(sizeFloat64) + if bufLen <= 0 { + // bufLen is too big and has wrapped around. + return nil, errTooBig + } + + header := storage{ + Form: 'G', Packing: 'F', Uplo: 'A', + Rows: int64(v.mat.N), Cols: 1, + Version: version, + } + buf := make([]byte, bufLen) + n, err := header.marshalBinaryTo(bytes.NewBuffer(buf[:0])) + if err != nil { + return buf[:n], err + } + + p := headerSize + for i := 0; i < v.mat.N; i++ { + binary.LittleEndian.PutUint64(buf[p:p+sizeFloat64], math.Float64bits(v.at(i))) + p += sizeFloat64 + } + + return buf, nil +} + +// MarshalBinaryTo encodes the receiver into a binary form, writes it to w and +// returns the number of bytes written and an error if any. +// +// See MarshalBinary for the on-disk format. +func (v VecDense) MarshalBinaryTo(w io.Writer) (int, error) { + header := storage{ + Form: 'G', Packing: 'F', Uplo: 'A', + Rows: int64(v.mat.N), Cols: 1, + Version: version, + } + n, err := header.marshalBinaryTo(w) + if err != nil { + return n, err + } + + var buf [8]byte + for i := 0; i < v.mat.N; i++ { + binary.LittleEndian.PutUint64(buf[:], math.Float64bits(v.at(i))) + nn, err := w.Write(buf[:]) + n += nn + if err != nil { + return n, err + } + } + + return n, nil +} + +// UnmarshalBinary decodes the binary form into the receiver. +// It panics if the receiver is a non-empty VecDense. +// +// See MarshalBinary for the on-disk layout. +// +// Limited checks on the validity of the binary input are performed: +// - ErrShape is returned if the number of rows is negative, +// - an error is returned if the resulting VecDense is too +// big for the current architecture (e.g. a 16GB vector written by a +// 64b application and read back from a 32b application.) +// +// UnmarshalBinary does not limit the size of the unmarshaled vector, and so +// it should not be used on untrusted data. +func (v *VecDense) UnmarshalBinary(data []byte) error { + if !v.IsEmpty() { + panic("mat: unmarshal into non-empty vector") + } + + if len(data) < headerSize { + return errTooSmall + } + + var header storage + err := header.unmarshalBinary(data[:headerSize]) + if err != nil { + return err + } + if header.Cols != 1 { + return ErrShape + } + n := header.Rows + header.Version = 0 + header.Rows = 0 + header.Cols = 0 + if (header != storage{Form: 'G', Packing: 'F', Uplo: 'A'}) { + return errWrongType + } + if n == 0 { + return ErrZeroLength + } + if n < 0 { + return errBadSize + } + if int64(maxLen) < n { + return errTooBig + } + if len(data) != headerSize+int(n)*sizeFloat64 { + return errBadBuffer + } + + p := headerSize + v.reuseAsNonZeroed(int(n)) + for i := range v.mat.Data { + v.mat.Data[i] = math.Float64frombits(binary.LittleEndian.Uint64(data[p : p+sizeFloat64])) + p += sizeFloat64 + } + + return nil +} + +// UnmarshalBinaryFrom decodes the binary form into the receiver, from the +// io.Reader and returns the number of bytes read and an error if any. +// It panics if the receiver is a non-empty VecDense. +// +// See MarshalBinary for the on-disk layout. +// See UnmarshalBinary for the list of sanity checks performed on the input. +func (v *VecDense) UnmarshalBinaryFrom(r io.Reader) (int, error) { + if !v.IsEmpty() { + panic("mat: unmarshal into non-empty vector") + } + + var header storage + n, err := header.unmarshalBinaryFrom(r) + if err != nil { + return n, err + } + if header.Cols != 1 { + return n, ErrShape + } + l := header.Rows + header.Version = 0 + header.Rows = 0 + header.Cols = 0 + if (header != storage{Form: 'G', Packing: 'F', Uplo: 'A'}) { + return n, errWrongType + } + if l == 0 { + return n, ErrZeroLength + } + if l < 0 { + return n, errBadSize + } + if int64(maxLen) < l { + return n, errTooBig + } + + v.reuseAsNonZeroed(int(l)) + var b [8]byte + for i := range v.mat.Data { + nn, err := readFull(r, b[:]) + n += nn + if err != nil { + if err == io.EOF { + return n, io.ErrUnexpectedEOF + } + return n, err + } + v.mat.Data[i] = math.Float64frombits(binary.LittleEndian.Uint64(b[:])) + } + + return n, nil +} + +// storage is the internal representation of the storage format of a +// serialised matrix. +type storage struct { + Version uint32 // Keep this first. + Form byte // [GST] + Packing byte // [BPF] + Uplo byte // [AUL] + Unit bool + Rows int64 + Cols int64 + KU int64 + KL int64 +} + +// TODO(kortschak): Consider replacing these with calls to direct +// encoding/decoding of fields rather than to binary.Write/binary.Read. + +func (s storage) marshalBinaryTo(w io.Writer) (int, error) { + buf := bytes.NewBuffer(make([]byte, 0, headerSize)) + err := binary.Write(buf, binary.LittleEndian, s) + if err != nil { + return 0, err + } + return w.Write(buf.Bytes()) +} + +func (s *storage) unmarshalBinary(buf []byte) error { + err := binary.Read(bytes.NewReader(buf), binary.LittleEndian, s) + if err != nil { + return err + } + if s.Version != version { + return fmt.Errorf("mat: incorrect version: %d", s.Version) + } + return nil +} + +func (s *storage) unmarshalBinaryFrom(r io.Reader) (int, error) { + buf := make([]byte, headerSize) + n, err := readFull(r, buf) + if err != nil { + return n, err + } + return n, s.unmarshalBinary(buf[:n]) +} + +// readFull reads from r into buf until it has read len(buf). +// It returns the number of bytes copied and an error if fewer bytes were read. +// If an EOF happens after reading fewer than len(buf) bytes, io.ErrUnexpectedEOF is returned. +func readFull(r io.Reader, buf []byte) (int, error) { + var n int + var err error + for n < len(buf) && err == nil { + var nn int + nn, err = r.Read(buf[n:]) + n += nn + } + if n == len(buf) { + return n, nil + } + if err == io.EOF { + return n, io.ErrUnexpectedEOF + } + return n, err +} diff --git a/vendor/gonum.org/v1/gonum/mat/lq.go b/vendor/gonum.org/v1/gonum/mat/lq.go new file mode 100644 index 0000000000..a3b3543b08 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/mat/lq.go @@ -0,0 +1,305 @@ +// Copyright ©2013 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package mat + +import ( + "math" + + "gonum.org/v1/gonum/blas" + "gonum.org/v1/gonum/blas/blas64" + "gonum.org/v1/gonum/lapack" + "gonum.org/v1/gonum/lapack/lapack64" +) + +const badLQ = "mat: invalid LQ factorization" + +// LQ is a type for creating and using the LQ factorization of a matrix. +type LQ struct { + lq *Dense + q *Dense + tau []float64 + cond float64 +} + +// Dims returns the dimensions of the matrix. +func (lq *LQ) Dims() (r, c int) { + if lq.lq == nil { + return 0, 0 + } + return lq.lq.Dims() +} + +// At returns the element at row i, column j. +func (lq *LQ) At(i, j int) float64 { + m, n := lq.Dims() + if uint(i) >= uint(m) { + panic(ErrRowAccess) + } + if uint(j) >= uint(n) { + panic(ErrColAccess) + } + + var val float64 + for k := 0; k <= i; k++ { + val += lq.lq.at(i, k) * lq.q.at(k, j) + } + return val +} + +// T performs an implicit transpose by returning the receiver inside a +// Transpose. +func (lq *LQ) T() Matrix { + return Transpose{lq} +} + +func (lq *LQ) updateCond(norm lapack.MatrixNorm) { + // Since A = L*Q, and Q is orthogonal, we get for the condition number κ + // κ(A) := |A| |A^-1| = |L*Q| |(L*Q)^-1| = |L| |Qᵀ * L^-1| + // = |L| |L^-1| = κ(L), + // where we used that fact that Q^-1 = Qᵀ. However, this assumes that + // the matrix norm is invariant under orthogonal transformations which + // is not the case for CondNorm. Hopefully the error is negligible: κ + // is only a qualitative measure anyway. + m := lq.lq.mat.Rows + work := getFloat64s(3*m, false) + iwork := getInts(m, false) + l := lq.lq.asTriDense(m, blas.NonUnit, blas.Lower) + v := lapack64.Trcon(norm, l.mat, work, iwork) + lq.cond = 1 / v + putFloat64s(work) + putInts(iwork) +} + +// Factorize computes the LQ factorization of an m×n matrix a where m <= n. The LQ +// factorization always exists even if A is singular. +// +// The LQ decomposition is a factorization of the matrix A such that A = L * Q. +// The matrix Q is an orthonormal n×n matrix, and L is an m×n lower triangular matrix. +// L and Q can be extracted using the LTo and QTo methods. +func (lq *LQ) Factorize(a Matrix) { + lq.factorize(a, CondNorm) +} + +func (lq *LQ) factorize(a Matrix, norm lapack.MatrixNorm) { + m, n := a.Dims() + if m > n { + panic(ErrShape) + } + if lq.lq == nil { + lq.lq = &Dense{} + } + lq.lq.CloneFrom(a) + work := []float64{0} + lq.tau = make([]float64, m) + lapack64.Gelqf(lq.lq.mat, lq.tau, work, -1) + work = getFloat64s(int(work[0]), false) + lapack64.Gelqf(lq.lq.mat, lq.tau, work, len(work)) + putFloat64s(work) + lq.updateCond(norm) + lq.updateQ() +} + +func (lq *LQ) updateQ() { + _, n := lq.Dims() + if lq.q == nil { + lq.q = NewDense(n, n, nil) + } else { + lq.q.reuseAsNonZeroed(n, n) + } + // Construct Q from the elementary reflectors. + lq.q.Copy(lq.lq) + work := []float64{0} + lapack64.Orglq(lq.q.mat, lq.tau, work, -1) + work = getFloat64s(int(work[0]), false) + lapack64.Orglq(lq.q.mat, lq.tau, work, len(work)) + putFloat64s(work) +} + +// isValid returns whether the receiver contains a factorization. +func (lq *LQ) isValid() bool { + return lq.lq != nil && !lq.lq.IsEmpty() +} + +// Cond returns the condition number for the factorized matrix. +// Cond will panic if the receiver does not contain a factorization. +func (lq *LQ) Cond() float64 { + if !lq.isValid() { + panic(badLQ) + } + return lq.cond +} + +// TODO(btracey): Add in the "Reduced" forms for extracting the m×m orthogonal +// and upper triangular matrices. + +// LTo extracts the m×n lower trapezoidal matrix from a LQ decomposition. +// +// If dst is empty, LTo will resize dst to be r×c. When dst is +// non-empty, LTo will panic if dst is not r×c. LTo will also panic +// if the receiver does not contain a successful factorization. +func (lq *LQ) LTo(dst *Dense) { + if !lq.isValid() { + panic(badLQ) + } + + r, c := lq.lq.Dims() + if dst.IsEmpty() { + dst.ReuseAs(r, c) + } else { + r2, c2 := dst.Dims() + if r != r2 || c != c2 { + panic(ErrShape) + } + } + + // Disguise the LQ as a lower triangular. + t := &TriDense{ + mat: blas64.Triangular{ + N: r, + Stride: lq.lq.mat.Stride, + Data: lq.lq.mat.Data, + Uplo: blas.Lower, + Diag: blas.NonUnit, + }, + cap: lq.lq.capCols, + } + dst.Copy(t) + + if r == c { + return + } + // Zero right of the triangular. + for i := 0; i < r; i++ { + zero(dst.mat.Data[i*dst.mat.Stride+r : i*dst.mat.Stride+c]) + } +} + +// QTo extracts the n×n orthonormal matrix Q from an LQ decomposition. +// +// If dst is empty, QTo will resize dst to be n×n. When dst is +// non-empty, QTo will panic if dst is not n×n. QTo will also panic +// if the receiver does not contain a successful factorization. +func (lq *LQ) QTo(dst *Dense) { + if !lq.isValid() { + panic(badLQ) + } + + _, n := lq.lq.Dims() + if dst.IsEmpty() { + dst.ReuseAs(n, n) + } else { + m2, n2 := dst.Dims() + if n != m2 || n != n2 { + panic(ErrShape) + } + } + dst.Copy(lq.q) +} + +// SolveTo finds a minimum-norm solution to a system of linear equations defined +// by the matrices A and b, where A is an m×n matrix represented in its LQ factorized +// form. If A is singular or near-singular a Condition error is returned. +// See the documentation for Condition for more information. +// +// The minimization problem solved depends on the input parameters. +// +// If trans == false, find the minimum norm solution of A * X = B. +// If trans == true, find X such that ||A*X - B||_2 is minimized. +// +// The solution matrix, X, is stored in place into dst. +// SolveTo will panic if the receiver does not contain a factorization. +func (lq *LQ) SolveTo(dst *Dense, trans bool, b Matrix) error { + if !lq.isValid() { + panic(badLQ) + } + + r, c := lq.lq.Dims() + br, bc := b.Dims() + + // The LQ solve algorithm stores the result in-place into the right hand side. + // The storage for the answer must be large enough to hold both b and x. + // However, this method's receiver must be the size of x. Copy b, and then + // copy the result into x at the end. + if trans { + if c != br { + panic(ErrShape) + } + dst.reuseAsNonZeroed(r, bc) + } else { + if r != br { + panic(ErrShape) + } + dst.reuseAsNonZeroed(c, bc) + } + // Do not need to worry about overlap between x and b because w has its own + // independent storage. + w := getDenseWorkspace(max(r, c), bc, false) + w.Copy(b) + t := lq.lq.asTriDense(lq.lq.mat.Rows, blas.NonUnit, blas.Lower).mat + if trans { + work := []float64{0} + lapack64.Ormlq(blas.Left, blas.NoTrans, lq.lq.mat, lq.tau, w.mat, work, -1) + work = getFloat64s(int(work[0]), false) + lapack64.Ormlq(blas.Left, blas.NoTrans, lq.lq.mat, lq.tau, w.mat, work, len(work)) + putFloat64s(work) + + ok := lapack64.Trtrs(blas.Trans, t, w.mat) + if !ok { + return Condition(math.Inf(1)) + } + } else { + ok := lapack64.Trtrs(blas.NoTrans, t, w.mat) + if !ok { + return Condition(math.Inf(1)) + } + for i := r; i < c; i++ { + zero(w.mat.Data[i*w.mat.Stride : i*w.mat.Stride+bc]) + } + work := []float64{0} + lapack64.Ormlq(blas.Left, blas.Trans, lq.lq.mat, lq.tau, w.mat, work, -1) + work = getFloat64s(int(work[0]), false) + lapack64.Ormlq(blas.Left, blas.Trans, lq.lq.mat, lq.tau, w.mat, work, len(work)) + putFloat64s(work) + } + // x was set above to be the correct size for the result. + dst.Copy(w) + putDenseWorkspace(w) + if lq.cond > ConditionTolerance { + return Condition(lq.cond) + } + return nil +} + +// SolveVecTo finds a minimum-norm solution to a system of linear equations. +// See LQ.SolveTo for the full documentation. +// SolveToVec will panic if the receiver does not contain a factorization. +func (lq *LQ) SolveVecTo(dst *VecDense, trans bool, b Vector) error { + if !lq.isValid() { + panic(badLQ) + } + + r, c := lq.lq.Dims() + if _, bc := b.Dims(); bc != 1 { + panic(ErrShape) + } + + // The Solve implementation is non-trivial, so rather than duplicate the code, + // instead recast the VecDenses as Dense and call the matrix code. + bm := Matrix(b) + if rv, ok := b.(RawVectorer); ok { + bmat := rv.RawVector() + if dst != b { + dst.checkOverlap(bmat) + } + b := VecDense{mat: bmat} + bm = b.asDense() + } + if trans { + dst.reuseAsNonZeroed(r) + } else { + dst.reuseAsNonZeroed(c) + } + return lq.SolveTo(dst.asDense(), trans, bm) +} diff --git a/vendor/gonum.org/v1/gonum/mat/lu.go b/vendor/gonum.org/v1/gonum/mat/lu.go new file mode 100644 index 0000000000..b530ada7e5 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/mat/lu.go @@ -0,0 +1,487 @@ +// Copyright ©2013 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package mat + +import ( + "math" + + "gonum.org/v1/gonum/blas" + "gonum.org/v1/gonum/blas/blas64" + "gonum.org/v1/gonum/floats" + "gonum.org/v1/gonum/lapack" + "gonum.org/v1/gonum/lapack/lapack64" +) + +const ( + badSliceLength = "mat: improper slice length" + badLU = "mat: invalid LU factorization" +) + +// LU is a square n×n matrix represented by its LU factorization with partial +// pivoting. +// +// The factorization has the form +// +// A = P * L * U +// +// where P is a permutation matrix, L is lower triangular with unit diagonal +// elements, and U is upper triangular. +// +// Note that this matrix representation is useful for certain operations, in +// particular for solving linear systems of equations. It is very inefficient at +// other operations, in particular At is slow. +type LU struct { + lu *Dense + swaps []int + piv []int + cond float64 + ok bool // Whether A is nonsingular +} + +var _ Matrix = (*LU)(nil) + +// Dims returns the dimensions of the matrix A. +func (lu *LU) Dims() (r, c int) { + if lu.lu == nil { + return 0, 0 + } + return lu.lu.Dims() +} + +// At returns the element of A at row i, column j. +func (lu *LU) At(i, j int) float64 { + n, _ := lu.Dims() + if uint(i) >= uint(n) { + panic(ErrRowAccess) + } + if uint(j) >= uint(n) { + panic(ErrColAccess) + } + + i = lu.piv[i] + var val float64 + for k := 0; k < min(i, j+1); k++ { + val += lu.lu.at(i, k) * lu.lu.at(k, j) + } + if i <= j { + val += lu.lu.at(i, j) + } + return val +} + +// T performs an implicit transpose by returning the receiver inside a +// Transpose. +func (lu *LU) T() Matrix { + return Transpose{lu} +} + +// updateCond updates the stored condition number of the matrix. anorm is the +// norm of the original matrix. If anorm is negative it will be estimated. +func (lu *LU) updateCond(anorm float64, norm lapack.MatrixNorm) { + n := lu.lu.mat.Cols + work := getFloat64s(4*n, false) + defer putFloat64s(work) + iwork := getInts(n, false) + defer putInts(iwork) + if anorm < 0 { + // This is an approximation. By the definition of a norm, + // |AB| <= |A| |B|. + // Since A = L*U, we get for the condition number κ that + // κ(A) := |A| |A^-1| = |L*U| |A^-1| <= |L| |U| |A^-1|, + // so this will overestimate the condition number somewhat. + // The norm of the original factorized matrix cannot be stored + // because of update possibilities. + u := lu.lu.asTriDense(n, blas.NonUnit, blas.Upper) + l := lu.lu.asTriDense(n, blas.Unit, blas.Lower) + unorm := lapack64.Lantr(norm, u.mat, work) + lnorm := lapack64.Lantr(norm, l.mat, work) + anorm = unorm * lnorm + } + v := lapack64.Gecon(norm, lu.lu.mat, anorm, work, iwork) + lu.cond = 1 / v +} + +// Factorize computes the LU factorization of the square matrix A and stores the +// result in the receiver. The LU decomposition will complete regardless of the +// singularity of a. +// +// The L and U matrix factors can be extracted from the factorization using the +// LTo and UTo methods. The matrix P can be extracted as a row permutation using +// the RowPivots method and applied using Dense.PermuteRows. +func (lu *LU) Factorize(a Matrix) { + lu.factorize(a, CondNorm) +} + +func (lu *LU) factorize(a Matrix, norm lapack.MatrixNorm) { + m, n := a.Dims() + if m != n { + panic(ErrSquare) + } + if lu.lu == nil { + lu.lu = NewDense(n, n, nil) + } else { + lu.lu.Reset() + lu.lu.reuseAsNonZeroed(n, n) + } + lu.lu.Copy(a) + lu.swaps = useInt(lu.swaps, n) + lu.piv = useInt(lu.piv, n) + work := getFloat64s(n, false) + anorm := lapack64.Lange(norm, lu.lu.mat, work) + putFloat64s(work) + lu.ok = lapack64.Getrf(lu.lu.mat, lu.swaps) + lu.updatePivots(lu.swaps) + lu.updateCond(anorm, norm) +} + +func (lu *LU) updatePivots(swaps []int) { + // Replay the sequence of row swaps in order to find the row permutation. + for i := range lu.piv { + lu.piv[i] = i + } + n, _ := lu.Dims() + for i := n - 1; i >= 0; i-- { + v := swaps[i] + lu.piv[i], lu.piv[v] = lu.piv[v], lu.piv[i] + } +} + +// isValid returns whether the receiver contains a factorization. +func (lu *LU) isValid() bool { + return lu.lu != nil && !lu.lu.IsEmpty() +} + +// Cond returns the condition number for the factorized matrix. +// Cond will panic if the receiver does not contain a factorization. +func (lu *LU) Cond() float64 { + if !lu.isValid() { + panic(badLU) + } + return lu.cond +} + +// Reset resets the factorization so that it can be reused as the receiver of a +// dimensionally restricted operation. +func (lu *LU) Reset() { + if lu.lu != nil { + lu.lu.Reset() + } + lu.swaps = lu.swaps[:0] + lu.piv = lu.piv[:0] +} + +func (lu *LU) isZero() bool { + return len(lu.swaps) == 0 +} + +// Det returns the determinant of the matrix that has been factorized. In many +// expressions, using LogDet will be more numerically stable. +// Det will panic if the receiver does not contain a factorization. +func (lu *LU) Det() float64 { + if !lu.ok { + return 0 + } + det, sign := lu.LogDet() + return math.Exp(det) * sign +} + +// LogDet returns the log of the determinant and the sign of the determinant +// for the matrix that has been factorized. Numerical stability in product and +// division expressions is generally improved by working in log space. +// LogDet will panic if the receiver does not contain a factorization. +func (lu *LU) LogDet() (det float64, sign float64) { + if !lu.isValid() { + panic(badLU) + } + + _, n := lu.lu.Dims() + logDiag := getFloat64s(n, false) + defer putFloat64s(logDiag) + sign = 1.0 + for i := 0; i < n; i++ { + v := lu.lu.at(i, i) + if v < 0 { + sign *= -1 + } + if lu.swaps[i] != i { + sign *= -1 + } + logDiag[i] = math.Log(math.Abs(v)) + } + return floats.Sum(logDiag), sign +} + +// RowPivots returns the row permutation that represents the permutation matrix +// P from the LU factorization +// +// A = P * L * U. +// +// If dst is nil, a new slice is allocated and returned. If dst is not nil and +// the length of dst does not equal the size of the factorized matrix, RowPivots +// will panic. RowPivots will panic if the receiver does not contain a +// factorization. +func (lu *LU) RowPivots(dst []int) []int { + if !lu.isValid() { + panic(badLU) + } + _, n := lu.lu.Dims() + if dst == nil { + dst = make([]int, n) + } + if len(dst) != n { + panic(badSliceLength) + } + copy(dst, lu.piv) + return dst +} + +// Pivot returns the row pivots of the receiver. +// +// Deprecated: Use RowPivots instead. +func (lu *LU) Pivot(dst []int) []int { + return lu.RowPivots(dst) +} + +// RankOne updates an LU factorization as if a rank-one update had been applied to +// the original matrix A, storing the result into the receiver. That is, if in +// the original LU decomposition P * L * U = A, in the updated decomposition +// P * L' * U' = A + alpha * x * yᵀ. +// RankOne will panic if orig does not contain a factorization. +func (lu *LU) RankOne(orig *LU, alpha float64, x, y Vector) { + if !orig.isValid() { + panic(badLU) + } + + // RankOne uses algorithm a1 on page 28 of "Multiple-Rank Updates to Matrix + // Factorizations for Nonlinear Analysis and Circuit Design" by Linzhong Deng. + // http://web.stanford.edu/group/SOL/dissertations/Linzhong-Deng-thesis.pdf + _, n := orig.lu.Dims() + if r, c := x.Dims(); r != n || c != 1 { + panic(ErrShape) + } + if r, c := y.Dims(); r != n || c != 1 { + panic(ErrShape) + } + if orig != lu { + if lu.isZero() { + lu.swaps = useInt(lu.swaps, n) + lu.piv = useInt(lu.piv, n) + if lu.lu == nil { + lu.lu = NewDense(n, n, nil) + } else { + lu.lu.reuseAsNonZeroed(n, n) + } + } else if len(lu.swaps) != n { + panic(ErrShape) + } + copy(lu.swaps, orig.swaps) + lu.updatePivots(lu.swaps) + lu.lu.Copy(orig.lu) + } + + xs := getFloat64s(n, false) + defer putFloat64s(xs) + ys := getFloat64s(n, false) + defer putFloat64s(ys) + for i := 0; i < n; i++ { + xs[i] = x.AtVec(i) + ys[i] = y.AtVec(i) + } + + // Adjust for the pivoting in the LU factorization + for i, v := range lu.swaps { + xs[i], xs[v] = xs[v], xs[i] + } + + lum := lu.lu.mat + omega := alpha + for j := 0; j < n; j++ { + ujj := lum.Data[j*lum.Stride+j] + ys[j] /= ujj + theta := 1 + xs[j]*ys[j]*omega + beta := omega * ys[j] / theta + gamma := omega * xs[j] + omega -= beta * gamma + lum.Data[j*lum.Stride+j] *= theta + for i := j + 1; i < n; i++ { + xs[i] -= lum.Data[i*lum.Stride+j] * xs[j] + tmp := ys[i] + ys[i] -= lum.Data[j*lum.Stride+i] * ys[j] + lum.Data[i*lum.Stride+j] += beta * xs[i] + lum.Data[j*lum.Stride+i] += gamma * tmp + } + } + lu.updateCond(-1, CondNorm) +} + +// LTo extracts the lower triangular matrix from an LU factorization. +// +// If dst is empty, LTo will resize dst to be a lower-triangular n×n matrix. +// When dst is non-empty, LTo will panic if dst is not n×n or not Lower. +// LTo will also panic if the receiver does not contain a successful +// factorization. +func (lu *LU) LTo(dst *TriDense) *TriDense { + if !lu.isValid() { + panic(badLU) + } + + _, n := lu.lu.Dims() + if dst.IsEmpty() { + dst.ReuseAsTri(n, Lower) + } else { + n2, kind := dst.Triangle() + if n != n2 { + panic(ErrShape) + } + if kind != Lower { + panic(ErrTriangle) + } + } + // Extract the lower triangular elements. + for i := 1; i < n; i++ { + copy(dst.mat.Data[i*dst.mat.Stride:i*dst.mat.Stride+i], lu.lu.mat.Data[i*lu.lu.mat.Stride:i*lu.lu.mat.Stride+i]) + } + // Set ones on the diagonal. + for i := 0; i < n; i++ { + dst.mat.Data[i*dst.mat.Stride+i] = 1 + } + return dst +} + +// UTo extracts the upper triangular matrix from an LU factorization. +// +// If dst is empty, UTo will resize dst to be an upper-triangular n×n matrix. +// When dst is non-empty, UTo will panic if dst is not n×n or not Upper. +// UTo will also panic if the receiver does not contain a successful +// factorization. +func (lu *LU) UTo(dst *TriDense) { + if !lu.isValid() { + panic(badLU) + } + + _, n := lu.lu.Dims() + if dst.IsEmpty() { + dst.ReuseAsTri(n, Upper) + } else { + n2, kind := dst.Triangle() + if n != n2 { + panic(ErrShape) + } + if kind != Upper { + panic(ErrTriangle) + } + } + // Extract the upper triangular elements. + for i := 0; i < n; i++ { + copy(dst.mat.Data[i*dst.mat.Stride+i:i*dst.mat.Stride+n], lu.lu.mat.Data[i*lu.lu.mat.Stride+i:i*lu.lu.mat.Stride+n]) + } +} + +// SolveTo solves a system of linear equations +// +// A * X = B if trans == false +// Aᵀ * X = B if trans == true +// +// using the LU factorization of A stored in the receiver. The solution matrix X +// is stored into dst. +// +// If A is singular or near-singular a Condition error is returned. See the +// documentation for Condition for more information. SolveTo will panic if the +// receiver does not contain a factorization. +func (lu *LU) SolveTo(dst *Dense, trans bool, b Matrix) error { + if !lu.isValid() { + panic(badLU) + } + + _, n := lu.lu.Dims() + br, bc := b.Dims() + if br != n { + panic(ErrShape) + } + + if !lu.ok { + return Condition(math.Inf(1)) + } + + dst.reuseAsNonZeroed(n, bc) + bU, _ := untranspose(b) + if dst == bU { + var restore func() + dst, restore = dst.isolatedWorkspace(bU) + defer restore() + } else if rm, ok := bU.(RawMatrixer); ok { + dst.checkOverlap(rm.RawMatrix()) + } + + dst.Copy(b) + t := blas.NoTrans + if trans { + t = blas.Trans + } + lapack64.Getrs(t, lu.lu.mat, dst.mat, lu.swaps) + if lu.cond > ConditionTolerance { + return Condition(lu.cond) + } + return nil +} + +// SolveVecTo solves a system of linear equations +// +// A * x = b if trans == false +// Aᵀ * x = b if trans == true +// +// using the LU factorization of A stored in the receiver. The solution matrix x +// is stored into dst. +// +// If A is singular or near-singular a Condition error is returned. See the +// documentation for Condition for more information. SolveVecTo will panic if the +// receiver does not contain a factorization. +func (lu *LU) SolveVecTo(dst *VecDense, trans bool, b Vector) error { + if !lu.isValid() { + panic(badLU) + } + + _, n := lu.lu.Dims() + if br, bc := b.Dims(); br != n || bc != 1 { + panic(ErrShape) + } + + switch rv := b.(type) { + default: + dst.reuseAsNonZeroed(n) + return lu.SolveTo(dst.asDense(), trans, b) + case RawVectorer: + if dst != b { + dst.checkOverlap(rv.RawVector()) + } + + if !lu.ok { + return Condition(math.Inf(1)) + } + + dst.reuseAsNonZeroed(n) + var restore func() + if dst == b { + dst, restore = dst.isolatedWorkspace(b) + defer restore() + } + dst.CopyVec(b) + vMat := blas64.General{ + Rows: n, + Cols: 1, + Stride: dst.mat.Inc, + Data: dst.mat.Data, + } + t := blas.NoTrans + if trans { + t = blas.Trans + } + lapack64.Getrs(t, lu.lu.mat, vMat, lu.swaps) + if lu.cond > ConditionTolerance { + return Condition(lu.cond) + } + return nil + } +} diff --git a/vendor/gonum.org/v1/gonum/mat/matrix.go b/vendor/gonum.org/v1/gonum/mat/matrix.go new file mode 100644 index 0000000000..2d67bbe081 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/mat/matrix.go @@ -0,0 +1,1000 @@ +// Copyright ©2013 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package mat + +import ( + "math" + + "gonum.org/v1/gonum/blas" + "gonum.org/v1/gonum/blas/blas64" + "gonum.org/v1/gonum/floats/scalar" + "gonum.org/v1/gonum/lapack" +) + +// Matrix is the basic matrix interface type. +type Matrix interface { + // Dims returns the dimensions of a Matrix. + Dims() (r, c int) + + // At returns the value of a matrix element at row i, column j. + // It will panic if i or j are out of bounds for the matrix. + At(i, j int) float64 + + // T returns the transpose of the Matrix. Whether T returns a copy of the + // underlying data is implementation dependent. + // This method may be implemented using the Transpose type, which + // provides an implicit matrix transpose. + T() Matrix +} + +// allMatrix represents the extra set of methods that all mat Matrix types +// should satisfy. This is used to enforce compile-time consistency between the +// Dense types, especially helpful when adding new features. +type allMatrix interface { + Reseter + IsEmpty() bool + Zero() +} + +// denseMatrix represents the extra set of methods that all Dense Matrix types +// should satisfy. This is used to enforce compile-time consistency between the +// Dense types, especially helpful when adding new features. +type denseMatrix interface { + DiagView() Diagonal + Tracer + Normer +} + +var ( + _ Matrix = Transpose{} + _ Untransposer = Transpose{} +) + +// Transpose is a type for performing an implicit matrix transpose. It implements +// the Matrix interface, returning values from the transpose of the matrix within. +type Transpose struct { + Matrix Matrix +} + +// At returns the value of the element at row i and column j of the transposed +// matrix, that is, row j and column i of the Matrix field. +func (t Transpose) At(i, j int) float64 { + return t.Matrix.At(j, i) +} + +// Dims returns the dimensions of the transposed matrix. The number of rows returned +// is the number of columns in the Matrix field, and the number of columns is +// the number of rows in the Matrix field. +func (t Transpose) Dims() (r, c int) { + c, r = t.Matrix.Dims() + return r, c +} + +// T performs an implicit transpose by returning the Matrix field. +func (t Transpose) T() Matrix { + return t.Matrix +} + +// Untranspose returns the Matrix field. +func (t Transpose) Untranspose() Matrix { + return t.Matrix +} + +// Untransposer is a type that can undo an implicit transpose. +type Untransposer interface { + // Note: This interface is needed to unify all of the Transpose types. In + // the mat methods, we need to test if the Matrix has been implicitly + // transposed. If this is checked by testing for the specific Transpose type + // then the behavior will be different if the user uses T() or TTri() for a + // triangular matrix. + + // Untranspose returns the underlying Matrix stored for the implicit transpose. + Untranspose() Matrix +} + +// UntransposeBander is a type that can undo an implicit band transpose. +type UntransposeBander interface { + // Untranspose returns the underlying Banded stored for the implicit transpose. + UntransposeBand() Banded +} + +// UntransposeTrier is a type that can undo an implicit triangular transpose. +type UntransposeTrier interface { + // Untranspose returns the underlying Triangular stored for the implicit transpose. + UntransposeTri() Triangular +} + +// UntransposeTriBander is a type that can undo an implicit triangular banded +// transpose. +type UntransposeTriBander interface { + // Untranspose returns the underlying Triangular stored for the implicit transpose. + UntransposeTriBand() TriBanded +} + +// Mutable is a matrix interface type that allows elements to be altered. +type Mutable interface { + // Set alters the matrix element at row i, column j to v. + // It will panic if i or j are out of bounds for the matrix. + Set(i, j int, v float64) + + Matrix +} + +// A RowViewer can return a Vector reflecting a row that is backed by the matrix +// data. The Vector returned will have length equal to the number of columns. +type RowViewer interface { + RowView(i int) Vector +} + +// A RawRowViewer can return a slice of float64 reflecting a row that is backed by the matrix +// data. +type RawRowViewer interface { + RawRowView(i int) []float64 +} + +// A ColViewer can return a Vector reflecting a column that is backed by the matrix +// data. The Vector returned will have length equal to the number of rows. +type ColViewer interface { + ColView(j int) Vector +} + +// A RawColViewer can return a slice of float64 reflecting a column that is backed by the matrix +// data. +type RawColViewer interface { + RawColView(j int) []float64 +} + +// A ClonerFrom can make a copy of a into the receiver, overwriting the previous value of the +// receiver. The clone operation does not make any restriction on shape and will not cause +// shadowing. +type ClonerFrom interface { + CloneFrom(a Matrix) +} + +// A Reseter can reset the matrix so that it can be reused as the receiver of a dimensionally +// restricted operation. This is commonly used when the matrix is being used as a workspace +// or temporary matrix. +// +// If the matrix is a view, using Reset may result in data corruption in elements outside +// the view. Similarly, if the matrix shares backing data with another variable, using +// Reset may lead to unexpected changes in data values. +type Reseter interface { + Reset() +} + +// A Copier can make a copy of elements of a into the receiver. The submatrix copied +// starts at row and column 0 and has dimensions equal to the minimum dimensions of +// the two matrices. The number of row and columns copied is returned. +// Copy will copy from a source that aliases the receiver unless the source is transposed; +// an aliasing transpose copy will panic with the exception for a special case when +// the source data has a unitary increment or stride. +type Copier interface { + Copy(a Matrix) (r, c int) +} + +// A Grower can grow the size of the represented matrix by the given number of rows and columns. +// Growing beyond the size given by the Caps method will result in the allocation of a new +// matrix and copying of the elements. If Grow is called with negative increments it will +// panic with ErrIndexOutOfRange. +type Grower interface { + Caps() (r, c int) + Grow(r, c int) Matrix +} + +// A RawMatrixSetter can set the underlying blas64.General used by the receiver. There is no restriction +// on the shape of the receiver. Changes to the receiver's elements will be reflected in the blas64.General.Data. +type RawMatrixSetter interface { + SetRawMatrix(a blas64.General) +} + +// A RawMatrixer can return a blas64.General representation of the receiver. Changes to the blas64.General.Data +// slice will be reflected in the original matrix, changes to the Rows, Cols and Stride fields will not. +type RawMatrixer interface { + RawMatrix() blas64.General +} + +// A RawVectorer can return a blas64.Vector representation of the receiver. Changes to the blas64.Vector.Data +// slice will be reflected in the original matrix, changes to the Inc field will not. +type RawVectorer interface { + RawVector() blas64.Vector +} + +// A NonZeroDoer can call a function for each non-zero element of the receiver. +// The parameters of the function are the element indices and its value. +type NonZeroDoer interface { + DoNonZero(func(i, j int, v float64)) +} + +// A RowNonZeroDoer can call a function for each non-zero element of a row of the receiver. +// The parameters of the function are the element indices and its value. +type RowNonZeroDoer interface { + DoRowNonZero(i int, fn func(i, j int, v float64)) +} + +// A ColNonZeroDoer can call a function for each non-zero element of a column of the receiver. +// The parameters of the function are the element indices and its value. +type ColNonZeroDoer interface { + DoColNonZero(j int, fn func(i, j int, v float64)) +} + +// A SolveToer can solve a linear system A⋅X = B or Aᵀ⋅X = B where A is a matrix +// represented by the receiver and B is a given matrix, storing the result into +// dst. +// +// If dst is empty, SolveTo will resize it to the correct size, otherwise it +// must have the correct size. Individual implementations may impose other +// restrictions on the input parameters, for example that A is a square matrix. +type SolveToer interface { + SolveTo(dst *Dense, trans bool, b Matrix) error +} + +// untranspose untransposes a matrix if applicable. If a is an Untransposer, then +// untranspose returns the underlying matrix and true. If it is not, then it returns +// the input matrix and false. +func untranspose(a Matrix) (Matrix, bool) { + if ut, ok := a.(Untransposer); ok { + return ut.Untranspose(), true + } + return a, false +} + +// untransposeExtract returns an untransposed matrix in a built-in matrix type. +// +// The untransposed matrix is returned unaltered if it is a built-in matrix type. +// Otherwise, if it implements a Raw method, an appropriate built-in type value +// is returned holding the raw matrix value of the input. If neither of these +// is possible, the untransposed matrix is returned. +func untransposeExtract(a Matrix) (Matrix, bool) { + ut, trans := untranspose(a) + switch m := ut.(type) { + case *DiagDense, *SymBandDense, *TriBandDense, *BandDense, *TriDense, *SymDense, *Dense, *VecDense, *Tridiag: + return m, trans + // TODO(btracey): Add here if we ever have an equivalent of RawDiagDense. + case RawSymBander: + rsb := m.RawSymBand() + if rsb.Uplo != blas.Upper { + return ut, trans + } + var sb SymBandDense + sb.SetRawSymBand(rsb) + return &sb, trans + case RawTriBander: + rtb := m.RawTriBand() + if rtb.Diag == blas.Unit { + return ut, trans + } + var tb TriBandDense + tb.SetRawTriBand(rtb) + return &tb, trans + case RawBander: + var b BandDense + b.SetRawBand(m.RawBand()) + return &b, trans + case RawTriangular: + rt := m.RawTriangular() + if rt.Diag == blas.Unit { + return ut, trans + } + var t TriDense + t.SetRawTriangular(rt) + return &t, trans + case RawSymmetricer: + rs := m.RawSymmetric() + if rs.Uplo != blas.Upper { + return ut, trans + } + var s SymDense + s.SetRawSymmetric(rs) + return &s, trans + case RawMatrixer: + var d Dense + d.SetRawMatrix(m.RawMatrix()) + return &d, trans + case RawVectorer: + var v VecDense + v.SetRawVector(m.RawVector()) + return &v, trans + case RawTridiagonaler: + var d Tridiag + d.SetRawTridiagonal(m.RawTridiagonal()) + return &d, trans + default: + return ut, trans + } +} + +// TODO(btracey): Consider adding CopyCol/CopyRow if the behavior seems useful. +// TODO(btracey): Add in fast paths to Row/Col for the other concrete types +// (TriDense, etc.) as well as relevant interfaces (RowColer, RawRowViewer, etc.) + +// Col copies the elements in the jth column of the matrix into the slice dst. +// The length of the provided slice must equal the number of rows, unless the +// slice is nil in which case a new slice is first allocated. +func Col(dst []float64, j int, a Matrix) []float64 { + r, c := a.Dims() + if j < 0 || j >= c { + panic(ErrColAccess) + } + if dst == nil { + dst = make([]float64, r) + } else { + if len(dst) != r { + panic(ErrColLength) + } + } + aU, aTrans := untranspose(a) + if rm, ok := aU.(RawMatrixer); ok { + m := rm.RawMatrix() + if aTrans { + copy(dst, m.Data[j*m.Stride:j*m.Stride+m.Cols]) + return dst + } + blas64.Copy(blas64.Vector{N: r, Inc: m.Stride, Data: m.Data[j:]}, + blas64.Vector{N: r, Inc: 1, Data: dst}, + ) + return dst + } + for i := 0; i < r; i++ { + dst[i] = a.At(i, j) + } + return dst +} + +// Row copies the elements in the ith row of the matrix into the slice dst. +// The length of the provided slice must equal the number of columns, unless the +// slice is nil in which case a new slice is first allocated. +func Row(dst []float64, i int, a Matrix) []float64 { + r, c := a.Dims() + if i < 0 || i >= r { + panic(ErrColAccess) + } + if dst == nil { + dst = make([]float64, c) + } else { + if len(dst) != c { + panic(ErrRowLength) + } + } + aU, aTrans := untranspose(a) + if rm, ok := aU.(RawMatrixer); ok { + m := rm.RawMatrix() + if aTrans { + blas64.Copy(blas64.Vector{N: c, Inc: m.Stride, Data: m.Data[i:]}, + blas64.Vector{N: c, Inc: 1, Data: dst}, + ) + return dst + } + copy(dst, m.Data[i*m.Stride:i*m.Stride+m.Cols]) + return dst + } + for j := 0; j < c; j++ { + dst[j] = a.At(i, j) + } + return dst +} + +// Cond returns the condition number of the given matrix under the given norm. +// The condition number must be based on the 1-norm, 2-norm or ∞-norm. +// Cond will panic with ErrZeroLength if the matrix has zero size. +// +// BUG(btracey): The computation of the 1-norm and ∞-norm for non-square matrices +// is inaccurate, although is typically the right order of magnitude. See +// https://github.com/xianyi/OpenBLAS/issues/636. While the value returned will +// change with the resolution of this bug, the result from Cond will match the +// condition number used internally. +func Cond(a Matrix, norm float64) float64 { + m, n := a.Dims() + if m == 0 || n == 0 { + panic(ErrZeroLength) + } + var lnorm lapack.MatrixNorm + switch norm { + default: + panic("mat: bad norm value") + case 1: + lnorm = lapack.MaxColumnSum + case 2: + var svd SVD + ok := svd.Factorize(a, SVDNone) + if !ok { + return math.Inf(1) + } + return svd.Cond() + case math.Inf(1): + lnorm = lapack.MaxRowSum + } + + if m == n { + // Use the LU decomposition to compute the condition number. + var lu LU + lu.factorize(a, lnorm) + return lu.Cond() + } + if m > n { + // Use the QR factorization to compute the condition number. + var qr QR + qr.factorize(a, lnorm) + return qr.Cond() + } + // Use the LQ factorization to compute the condition number. + var lq LQ + lq.factorize(a, lnorm) + return lq.Cond() +} + +// Det returns the determinant of the square matrix a. In many expressions using +// LogDet will be more numerically stable. +// +// Det panics with ErrSquare if a is not square and with ErrZeroLength if a has +// zero size. +func Det(a Matrix) float64 { + det, sign := LogDet(a) + return math.Exp(det) * sign +} + +// Dot returns the sum of the element-wise product of a and b. +// +// Dot panics with ErrShape if the vector sizes are unequal and with +// ErrZeroLength if the sizes are zero. +func Dot(a, b Vector) float64 { + la := a.Len() + lb := b.Len() + if la != lb { + panic(ErrShape) + } + if la == 0 { + panic(ErrZeroLength) + } + if arv, ok := a.(RawVectorer); ok { + if brv, ok := b.(RawVectorer); ok { + return blas64.Dot(arv.RawVector(), brv.RawVector()) + } + } + var sum float64 + for i := 0; i < la; i++ { + sum += a.At(i, 0) * b.At(i, 0) + } + return sum +} + +// Equal returns whether the matrices a and b have the same size +// and are element-wise equal. +func Equal(a, b Matrix) bool { + ar, ac := a.Dims() + br, bc := b.Dims() + if ar != br || ac != bc { + return false + } + aU, aTrans := untranspose(a) + bU, bTrans := untranspose(b) + if rma, ok := aU.(RawMatrixer); ok { + if rmb, ok := bU.(RawMatrixer); ok { + ra := rma.RawMatrix() + rb := rmb.RawMatrix() + if aTrans == bTrans { + for i := 0; i < ra.Rows; i++ { + for j := 0; j < ra.Cols; j++ { + if ra.Data[i*ra.Stride+j] != rb.Data[i*rb.Stride+j] { + return false + } + } + } + return true + } + for i := 0; i < ra.Rows; i++ { + for j := 0; j < ra.Cols; j++ { + if ra.Data[i*ra.Stride+j] != rb.Data[j*rb.Stride+i] { + return false + } + } + } + return true + } + } + if rma, ok := aU.(RawSymmetricer); ok { + if rmb, ok := bU.(RawSymmetricer); ok { + ra := rma.RawSymmetric() + rb := rmb.RawSymmetric() + // Symmetric matrices are always upper and equal to their transpose. + for i := 0; i < ra.N; i++ { + for j := i; j < ra.N; j++ { + if ra.Data[i*ra.Stride+j] != rb.Data[i*rb.Stride+j] { + return false + } + } + } + return true + } + } + if ra, ok := aU.(*VecDense); ok { + if rb, ok := bU.(*VecDense); ok { + // If the raw vectors are the same length they must either both be + // transposed or both not transposed (or have length 1). + for i := 0; i < ra.mat.N; i++ { + if ra.mat.Data[i*ra.mat.Inc] != rb.mat.Data[i*rb.mat.Inc] { + return false + } + } + return true + } + } + for i := 0; i < ar; i++ { + for j := 0; j < ac; j++ { + if a.At(i, j) != b.At(i, j) { + return false + } + } + } + return true +} + +// EqualApprox returns whether the matrices a and b have the same size and contain all equal +// elements with tolerance for element-wise equality specified by epsilon. Matrices +// with non-equal shapes are not equal. +func EqualApprox(a, b Matrix, epsilon float64) bool { + ar, ac := a.Dims() + br, bc := b.Dims() + if ar != br || ac != bc { + return false + } + aU, aTrans := untranspose(a) + bU, bTrans := untranspose(b) + if rma, ok := aU.(RawMatrixer); ok { + if rmb, ok := bU.(RawMatrixer); ok { + ra := rma.RawMatrix() + rb := rmb.RawMatrix() + if aTrans == bTrans { + for i := 0; i < ra.Rows; i++ { + for j := 0; j < ra.Cols; j++ { + if !scalar.EqualWithinAbsOrRel(ra.Data[i*ra.Stride+j], rb.Data[i*rb.Stride+j], epsilon, epsilon) { + return false + } + } + } + return true + } + for i := 0; i < ra.Rows; i++ { + for j := 0; j < ra.Cols; j++ { + if !scalar.EqualWithinAbsOrRel(ra.Data[i*ra.Stride+j], rb.Data[j*rb.Stride+i], epsilon, epsilon) { + return false + } + } + } + return true + } + } + if rma, ok := aU.(RawSymmetricer); ok { + if rmb, ok := bU.(RawSymmetricer); ok { + ra := rma.RawSymmetric() + rb := rmb.RawSymmetric() + // Symmetric matrices are always upper and equal to their transpose. + for i := 0; i < ra.N; i++ { + for j := i; j < ra.N; j++ { + if !scalar.EqualWithinAbsOrRel(ra.Data[i*ra.Stride+j], rb.Data[i*rb.Stride+j], epsilon, epsilon) { + return false + } + } + } + return true + } + } + if ra, ok := aU.(*VecDense); ok { + if rb, ok := bU.(*VecDense); ok { + // If the raw vectors are the same length they must either both be + // transposed or both not transposed (or have length 1). + for i := 0; i < ra.mat.N; i++ { + if !scalar.EqualWithinAbsOrRel(ra.mat.Data[i*ra.mat.Inc], rb.mat.Data[i*rb.mat.Inc], epsilon, epsilon) { + return false + } + } + return true + } + } + for i := 0; i < ar; i++ { + for j := 0; j < ac; j++ { + if !scalar.EqualWithinAbsOrRel(a.At(i, j), b.At(i, j), epsilon, epsilon) { + return false + } + } + } + return true +} + +// LogDet returns the log of the determinant and the sign of the determinant +// for the matrix that has been factorized. Numerical stability in product and +// division expressions is generally improved by working in log space. +// +// LogDet panics with ErrSquare is a is not square and with ErrZeroLength if a +// has zero size. +func LogDet(a Matrix) (det float64, sign float64) { + // TODO(btracey): Add specialized routines for TriDense, etc. + var lu LU + lu.Factorize(a) + return lu.LogDet() +} + +// Max returns the largest element value of the matrix A. +// +// Max will panic with ErrZeroLength if the matrix has zero size. +func Max(a Matrix) float64 { + r, c := a.Dims() + if r == 0 || c == 0 { + panic(ErrZeroLength) + } + // Max(A) = Max(Aᵀ) + aU, _ := untranspose(a) + switch m := aU.(type) { + case RawMatrixer: + rm := m.RawMatrix() + max := math.Inf(-1) + for i := 0; i < rm.Rows; i++ { + for _, v := range rm.Data[i*rm.Stride : i*rm.Stride+rm.Cols] { + if v > max { + max = v + } + } + } + return max + case RawTriangular: + rm := m.RawTriangular() + // The max of a triangular is at least 0 unless the size is 1. + if rm.N == 1 { + return rm.Data[0] + } + max := 0.0 + if rm.Uplo == blas.Upper { + for i := 0; i < rm.N; i++ { + for _, v := range rm.Data[i*rm.Stride+i : i*rm.Stride+rm.N] { + if v > max { + max = v + } + } + } + return max + } + for i := 0; i < rm.N; i++ { + for _, v := range rm.Data[i*rm.Stride : i*rm.Stride+i+1] { + if v > max { + max = v + } + } + } + return max + case RawSymmetricer: + rm := m.RawSymmetric() + if rm.Uplo != blas.Upper { + panic(badSymTriangle) + } + max := math.Inf(-1) + for i := 0; i < rm.N; i++ { + for _, v := range rm.Data[i*rm.Stride+i : i*rm.Stride+rm.N] { + if v > max { + max = v + } + } + } + return max + default: + r, c := aU.Dims() + max := math.Inf(-1) + for i := 0; i < r; i++ { + for j := 0; j < c; j++ { + v := aU.At(i, j) + if v > max { + max = v + } + } + } + return max + } +} + +// Min returns the smallest element value of the matrix A. +// +// Min will panic with ErrZeroLength if the matrix has zero size. +func Min(a Matrix) float64 { + r, c := a.Dims() + if r == 0 || c == 0 { + panic(ErrZeroLength) + } + // Min(A) = Min(Aᵀ) + aU, _ := untranspose(a) + switch m := aU.(type) { + case RawMatrixer: + rm := m.RawMatrix() + min := math.Inf(1) + for i := 0; i < rm.Rows; i++ { + for _, v := range rm.Data[i*rm.Stride : i*rm.Stride+rm.Cols] { + if v < min { + min = v + } + } + } + return min + case RawTriangular: + rm := m.RawTriangular() + // The min of a triangular is at most 0 unless the size is 1. + if rm.N == 1 { + return rm.Data[0] + } + min := 0.0 + if rm.Uplo == blas.Upper { + for i := 0; i < rm.N; i++ { + for _, v := range rm.Data[i*rm.Stride+i : i*rm.Stride+rm.N] { + if v < min { + min = v + } + } + } + return min + } + for i := 0; i < rm.N; i++ { + for _, v := range rm.Data[i*rm.Stride : i*rm.Stride+i+1] { + if v < min { + min = v + } + } + } + return min + case RawSymmetricer: + rm := m.RawSymmetric() + if rm.Uplo != blas.Upper { + panic(badSymTriangle) + } + min := math.Inf(1) + for i := 0; i < rm.N; i++ { + for _, v := range rm.Data[i*rm.Stride+i : i*rm.Stride+rm.N] { + if v < min { + min = v + } + } + } + return min + default: + r, c := aU.Dims() + min := math.Inf(1) + for i := 0; i < r; i++ { + for j := 0; j < c; j++ { + v := aU.At(i, j) + if v < min { + min = v + } + } + } + return min + } +} + +// A Normer can compute a norm of the matrix. Valid norms are: +// +// 1 - The maximum absolute column sum +// 2 - The Frobenius norm, the square root of the sum of the squares of the elements +// Inf - The maximum absolute row sum +type Normer interface { + Norm(norm float64) float64 +} + +// Norm returns the specified norm of the matrix A. Valid norms are: +// +// 1 - The maximum absolute column sum +// 2 - The Frobenius norm, the square root of the sum of the squares of the elements +// Inf - The maximum absolute row sum +// +// If a is a Normer, its Norm method will be used to calculate the norm. +// +// Norm will panic with ErrNormOrder if an illegal norm is specified and with +// ErrShape if the matrix has zero size. +func Norm(a Matrix, norm float64) float64 { + r, c := a.Dims() + if r == 0 || c == 0 { + panic(ErrZeroLength) + } + m, trans := untransposeExtract(a) + if m, ok := m.(Normer); ok { + if trans { + switch norm { + case 1: + norm = math.Inf(1) + case math.Inf(1): + norm = 1 + } + } + return m.Norm(norm) + } + switch norm { + default: + panic(ErrNormOrder) + case 1: + var max float64 + for j := 0; j < c; j++ { + var sum float64 + for i := 0; i < r; i++ { + sum += math.Abs(a.At(i, j)) + } + if sum > max { + max = sum + } + } + return max + case 2: + var sum float64 + for i := 0; i < r; i++ { + for j := 0; j < c; j++ { + v := a.At(i, j) + sum += v * v + } + } + return math.Sqrt(sum) + case math.Inf(1): + var max float64 + for i := 0; i < r; i++ { + var sum float64 + for j := 0; j < c; j++ { + sum += math.Abs(a.At(i, j)) + } + if sum > max { + max = sum + } + } + return max + } +} + +// normLapack converts the float64 norm input in Norm to a lapack.MatrixNorm. +func normLapack(norm float64, aTrans bool) lapack.MatrixNorm { + switch norm { + case 1: + n := lapack.MaxColumnSum + if aTrans { + n = lapack.MaxRowSum + } + return n + case 2: + return lapack.Frobenius + case math.Inf(1): + n := lapack.MaxRowSum + if aTrans { + n = lapack.MaxColumnSum + } + return n + default: + panic(ErrNormOrder) + } +} + +// Sum returns the sum of the elements of the matrix. +// +// Sum will panic with ErrZeroLength if the matrix has zero size. +func Sum(a Matrix) float64 { + r, c := a.Dims() + if r == 0 || c == 0 { + panic(ErrZeroLength) + } + var sum float64 + aU, _ := untranspose(a) + switch rma := aU.(type) { + case RawSymmetricer: + rm := rma.RawSymmetric() + for i := 0; i < rm.N; i++ { + // Diagonals count once while off-diagonals count twice. + sum += rm.Data[i*rm.Stride+i] + var s float64 + for _, v := range rm.Data[i*rm.Stride+i+1 : i*rm.Stride+rm.N] { + s += v + } + sum += 2 * s + } + return sum + case RawTriangular: + rm := rma.RawTriangular() + var startIdx, endIdx int + for i := 0; i < rm.N; i++ { + // Start and end index for this triangle-row. + switch rm.Uplo { + case blas.Upper: + startIdx = i + endIdx = rm.N + case blas.Lower: + startIdx = 0 + endIdx = i + 1 + default: + panic(badTriangle) + } + for _, v := range rm.Data[i*rm.Stride+startIdx : i*rm.Stride+endIdx] { + sum += v + } + } + return sum + case RawMatrixer: + rm := rma.RawMatrix() + for i := 0; i < rm.Rows; i++ { + for _, v := range rm.Data[i*rm.Stride : i*rm.Stride+rm.Cols] { + sum += v + } + } + return sum + case *VecDense: + rm := rma.RawVector() + for i := 0; i < rm.N; i++ { + sum += rm.Data[i*rm.Inc] + } + return sum + default: + r, c := a.Dims() + for i := 0; i < r; i++ { + for j := 0; j < c; j++ { + sum += a.At(i, j) + } + } + return sum + } +} + +// A Tracer can compute the trace of the matrix. Trace must panic with ErrSquare +// if the matrix is not square. +type Tracer interface { + Trace() float64 +} + +// Trace returns the trace of the matrix. If a is a Tracer, its Trace method +// will be used to calculate the matrix trace. +// +// Trace will panic with ErrSquare if the matrix is not square and with +// ErrZeroLength if the matrix has zero size. +func Trace(a Matrix) float64 { + r, c := a.Dims() + if r == 0 || c == 0 { + panic(ErrZeroLength) + } + m, _ := untransposeExtract(a) + if t, ok := m.(Tracer); ok { + return t.Trace() + } + if r != c { + panic(ErrSquare) + } + var v float64 + for i := 0; i < r; i++ { + v += a.At(i, i) + } + return v +} + +// use returns a float64 slice with l elements, using f if it +// has the necessary capacity, otherwise creating a new slice. +func use(f []float64, l int) []float64 { + if l <= cap(f) { + return f[:l] + } + return make([]float64, l) +} + +// useZeroed returns a float64 slice with l elements, using f if it +// has the necessary capacity, otherwise creating a new slice. The +// elements of the returned slice are guaranteed to be zero. +func useZeroed(f []float64, l int) []float64 { + if l <= cap(f) { + f = f[:l] + zero(f) + return f + } + return make([]float64, l) +} + +// zero zeros the given slice's elements. +func zero(f []float64) { + for i := range f { + f[i] = 0 + } +} + +// useInt returns an int slice with l elements, using i if it +// has the necessary capacity, otherwise creating a new slice. +func useInt(i []int, l int) []int { + if l <= cap(i) { + return i[:l] + } + return make([]int, l) +} diff --git a/vendor/gonum.org/v1/gonum/mat/offset.go b/vendor/gonum.org/v1/gonum/mat/offset.go new file mode 100644 index 0000000000..26c80a4c8f --- /dev/null +++ b/vendor/gonum.org/v1/gonum/mat/offset.go @@ -0,0 +1,32 @@ +// Copyright ©2015 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build !safe +// +build !safe + +package mat + +import "unsafe" + +// offset returns the number of float64 values b[0] is after a[0]. +func offset(a, b []float64) int { + if &a[0] == &b[0] { + return 0 + } + // This expression must be atomic with respect to GC moves. + // At this stage this is true, because the GC does not + // move. See https://golang.org/issue/12445. + return int(uintptr(unsafe.Pointer(&b[0]))-uintptr(unsafe.Pointer(&a[0]))) / int(unsafe.Sizeof(float64(0))) +} + +// offsetComplex returns the number of complex128 values b[0] is after a[0]. +func offsetComplex(a, b []complex128) int { + if &a[0] == &b[0] { + return 0 + } + // This expression must be atomic with respect to GC moves. + // At this stage this is true, because the GC does not + // move. See https://golang.org/issue/12445. + return int(uintptr(unsafe.Pointer(&b[0]))-uintptr(unsafe.Pointer(&a[0]))) / int(unsafe.Sizeof(complex128(0))) +} diff --git a/vendor/gonum.org/v1/gonum/mat/offset_appengine.go b/vendor/gonum.org/v1/gonum/mat/offset_appengine.go new file mode 100644 index 0000000000..be2ca78cba --- /dev/null +++ b/vendor/gonum.org/v1/gonum/mat/offset_appengine.go @@ -0,0 +1,40 @@ +// Copyright ©2015 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build safe +// +build safe + +package mat + +import "reflect" + +var sizeOfFloat64 = int(reflect.TypeOf(float64(0)).Size()) + +// offset returns the number of float64 values b[0] is after a[0]. +func offset(a, b []float64) int { + va0 := reflect.ValueOf(a).Index(0) + vb0 := reflect.ValueOf(b).Index(0) + if va0.Addr() == vb0.Addr() { + return 0 + } + // This expression must be atomic with respect to GC moves. + // At this stage this is true, because the GC does not + // move. See https://golang.org/issue/12445. + return int(vb0.UnsafeAddr()-va0.UnsafeAddr()) / sizeOfFloat64 +} + +var sizeOfComplex128 = int(reflect.TypeOf(complex128(0)).Size()) + +// offsetComplex returns the number of complex128 values b[0] is after a[0]. +func offsetComplex(a, b []complex128) int { + va0 := reflect.ValueOf(a).Index(0) + vb0 := reflect.ValueOf(b).Index(0) + if va0.Addr() == vb0.Addr() { + return 0 + } + // This expression must be atomic with respect to GC moves. + // At this stage this is true, because the GC does not + // move. See https://golang.org/issue/12445. + return int(vb0.UnsafeAddr()-va0.UnsafeAddr()) / sizeOfComplex128 +} diff --git a/vendor/gonum.org/v1/gonum/mat/pool.go b/vendor/gonum.org/v1/gonum/mat/pool.go new file mode 100644 index 0000000000..b9dce1c45b --- /dev/null +++ b/vendor/gonum.org/v1/gonum/mat/pool.go @@ -0,0 +1,260 @@ +// Copyright ©2014 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package mat + +import ( + "math/bits" + "sync" + + "gonum.org/v1/gonum/blas" + "gonum.org/v1/gonum/blas/blas64" + "gonum.org/v1/gonum/blas/cblas128" +) + +// poolFor returns the ceiling of base 2 log of size. It provides an index +// into a pool array to a sync.Pool that will return values able to hold +// size elements. +func poolFor(size uint) int { + if size == 0 { + return 0 + } + return bits.Len(size - 1) +} + +var ( + // poolDense contains size stratified workspace Dense pools. + // Each poolDense element i returns sized matrices with a data + // slice capped at 1< 2. + if !m.IsEmpty() { + if fr != r { + panic(ErrShape) + } + if _, lc := factors[len(factors)-1].Dims(); lc != c { + panic(ErrShape) + } + } + + dims := make([]int, len(factors)+1) + dims[0] = r + dims[len(dims)-1] = c + pc := fc + for i, f := range factors[1:] { + cr, cc := f.Dims() + dims[i+1] = cr + if pc != cr { + panic(ErrShape) + } + pc = cc + } + + return &multiplier{ + factors: factors, + dims: dims, + table: newTable(len(factors)), + } +} + +// optimize determines an optimal matrix multiply operation order. +func (p *multiplier) optimize() { + if debugProductWalk { + fmt.Printf("chain dims: %v\n", p.dims) + } + const maxInt = int(^uint(0) >> 1) + for f := 1; f < len(p.factors); f++ { + for i := 0; i < len(p.factors)-f; i++ { + j := i + f + p.table.set(i, j, entry{cost: maxInt}) + for k := i; k < j; k++ { + cost := p.table.at(i, k).cost + p.table.at(k+1, j).cost + p.dims[i]*p.dims[k+1]*p.dims[j+1] + if cost < p.table.at(i, j).cost { + p.table.set(i, j, entry{cost: cost, k: k}) + } + } + } + } +} + +// multiply walks the optimal operation tree found by optimize, +// leaving the final result in the stack. It returns the +// product, which may be copied but should be returned to +// the workspace pool. +func (p *multiplier) multiply() *Dense { + result, _ := p.multiplySubchain(0, len(p.factors)-1) + if debugProductWalk { + r, c := result.Dims() + fmt.Printf("\tpop result (%d×%d) cost=%d\n", r, c, p.table.at(0, len(p.factors)-1).cost) + } + return result.(*Dense) +} + +func (p *multiplier) multiplySubchain(i, j int) (m Matrix, intermediate bool) { + if i == j { + return p.factors[i], false + } + + a, aTmp := p.multiplySubchain(i, p.table.at(i, j).k) + b, bTmp := p.multiplySubchain(p.table.at(i, j).k+1, j) + + ar, ac := a.Dims() + br, bc := b.Dims() + if ac != br { + // Panic with a string since this + // is not a user-facing panic. + panic(ErrShape.Error()) + } + + if debugProductWalk { + fmt.Printf("\tpush f[%d] (%d×%d)%s * f[%d] (%d×%d)%s\n", + i, ar, ac, result(aTmp), j, br, bc, result(bTmp)) + } + + r := getDenseWorkspace(ar, bc, false) + r.Mul(a, b) + if aTmp { + putDenseWorkspace(a.(*Dense)) + } + if bTmp { + putDenseWorkspace(b.(*Dense)) + } + return r, true +} + +type entry struct { + k int // is the chain subdivision index. + cost int // cost is the cost of the operation. +} + +// table is a row major n×n dynamic programming table. +type table struct { + n int + entries []entry +} + +func newTable(n int) table { + return table{n: n, entries: make([]entry, n*n)} +} + +func (t table) at(i, j int) entry { return t.entries[i*t.n+j] } +func (t table) set(i, j int, e entry) { t.entries[i*t.n+j] = e } + +type result bool + +func (r result) String() string { + if r { + return " (popped result)" + } + return "" +} diff --git a/vendor/gonum.org/v1/gonum/mat/qr.go b/vendor/gonum.org/v1/gonum/mat/qr.go new file mode 100644 index 0000000000..7f8fec8f6f --- /dev/null +++ b/vendor/gonum.org/v1/gonum/mat/qr.go @@ -0,0 +1,349 @@ +// Copyright ©2013 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package mat + +import ( + "math" + + "gonum.org/v1/gonum/blas" + "gonum.org/v1/gonum/blas/blas64" + "gonum.org/v1/gonum/lapack" + "gonum.org/v1/gonum/lapack/lapack64" +) + +const badQR = "mat: invalid QR factorization" + +// QR is a type for creating and using the QR factorization of a matrix. +type QR struct { + qr *Dense + q *Dense + tau []float64 + cond float64 +} + +// Dims returns the dimensions of the matrix. +func (qr *QR) Dims() (r, c int) { + if qr.qr == nil { + return 0, 0 + } + return qr.qr.Dims() +} + +// At returns the element at row i, column j. At will panic if the receiver +// does not contain a successful factorization. +func (qr *QR) At(i, j int) float64 { + if !qr.isValid() { + panic(badQR) + } + + m, n := qr.Dims() + if uint(i) >= uint(m) { + panic(ErrRowAccess) + } + if uint(j) >= uint(n) { + panic(ErrColAccess) + } + + if qr.q == nil || qr.q.IsEmpty() { + // Calculate Qi, Q i-th row + qi := getFloat64s(m, true) + qr.qRowTo(i, qi) + + // Compute QR(i,j) + var val float64 + for k := 0; k <= j; k++ { + val += qi[k] * qr.qr.at(k, j) + } + putFloat64s(qi) + return val + } + + var val float64 + for k := 0; k <= j; k++ { + val += qr.q.at(i, k) * qr.qr.at(k, j) + } + return val +} + +// qRowTo extracts the i-th row of the orthonormal matrix Q from a QR +// decomposition. +func (qr *QR) qRowTo(i int, dst []float64) { + c := blas64.General{ + Rows: 1, + Cols: len(dst), + Stride: len(dst), + Data: dst, + } + c.Data[i] = 1 // C is the i-th unit vector + + // Construct Qi from the elementary reflectors: Qi = C * (H(1) H(2) ... H(nTau)) + work := []float64{0} + lapack64.Ormqr(blas.Right, blas.NoTrans, qr.qr.mat, qr.tau, c, work, -1) + work = getFloat64s(int(work[0]), false) + lapack64.Ormqr(blas.Right, blas.NoTrans, qr.qr.mat, qr.tau, c, work, len(work)) + putFloat64s(work) +} + +// T performs an implicit transpose by returning the receiver inside a +// Transpose. +func (qr *QR) T() Matrix { + return Transpose{qr} +} + +func (qr *QR) updateCond(norm lapack.MatrixNorm) { + // Since A = Q*R, and Q is orthogonal, we get for the condition number κ + // κ(A) := |A| |A^-1| = |Q*R| |(Q*R)^-1| = |R| |R^-1 * Qᵀ| + // = |R| |R^-1| = κ(R), + // where we used that fact that Q^-1 = Qᵀ. However, this assumes that + // the matrix norm is invariant under orthogonal transformations which + // is not the case for CondNorm. Hopefully the error is negligible: κ + // is only a qualitative measure anyway. + n := qr.qr.mat.Cols + work := getFloat64s(3*n, false) + iwork := getInts(n, false) + r := qr.qr.asTriDense(n, blas.NonUnit, blas.Upper) + v := lapack64.Trcon(norm, r.mat, work, iwork) + putFloat64s(work) + putInts(iwork) + qr.cond = 1 / v +} + +// Factorize computes the QR factorization of an m×n matrix a where m >= n. The QR +// factorization always exists even if A is singular. +// +// The QR decomposition is a factorization of the matrix A such that A = Q * R. +// The matrix Q is an orthonormal m×m matrix, and R is an m×n upper triangular matrix. +// Q and R can be extracted using the QTo and RTo methods. +func (qr *QR) Factorize(a Matrix) { + qr.factorize(a, CondNorm) +} + +func (qr *QR) factorize(a Matrix, norm lapack.MatrixNorm) { + m, n := a.Dims() + if m < n { + panic(ErrShape) + } + if qr.qr == nil { + qr.qr = &Dense{} + } + qr.qr.CloneFrom(a) + work := []float64{0} + qr.tau = make([]float64, n) + lapack64.Geqrf(qr.qr.mat, qr.tau, work, -1) + work = getFloat64s(int(work[0]), false) + lapack64.Geqrf(qr.qr.mat, qr.tau, work, len(work)) + putFloat64s(work) + qr.updateCond(norm) + if qr.q != nil { + qr.q.Reset() + } +} + +func (qr *QR) updateQ() { + m, _ := qr.Dims() + if qr.q == nil { + qr.q = NewDense(m, m, nil) + } else { + qr.q.reuseAsNonZeroed(m, m) + } + // Construct Q from the elementary reflectors. + qr.q.Copy(qr.qr) + work := []float64{0} + lapack64.Orgqr(qr.q.mat, qr.tau, work, -1) + work = getFloat64s(int(work[0]), false) + lapack64.Orgqr(qr.q.mat, qr.tau, work, len(work)) + putFloat64s(work) +} + +// isValid returns whether the receiver contains a factorization. +func (qr *QR) isValid() bool { + return qr.qr != nil && !qr.qr.IsEmpty() +} + +// Cond returns the condition number for the factorized matrix. +// Cond will panic if the receiver does not contain a factorization. +func (qr *QR) Cond() float64 { + if !qr.isValid() { + panic(badQR) + } + return qr.cond +} + +// TODO(btracey): Add in the "Reduced" forms for extracting the n×n orthogonal +// and upper triangular matrices. + +// RTo extracts the m×n upper trapezoidal matrix from a QR decomposition. +// +// If dst is empty, RTo will resize dst to be r×c. When dst is non-empty, +// RTo will panic if dst is not r×c. RTo will also panic if the receiver +// does not contain a successful factorization. +func (qr *QR) RTo(dst *Dense) { + if !qr.isValid() { + panic(badQR) + } + + r, c := qr.qr.Dims() + if dst.IsEmpty() { + dst.ReuseAs(r, c) + } else { + r2, c2 := dst.Dims() + if r != r2 || c != c2 { + panic(ErrShape) + } + } + + // Disguise the QR as an upper triangular + t := &TriDense{ + mat: blas64.Triangular{ + N: c, + Stride: qr.qr.mat.Stride, + Data: qr.qr.mat.Data, + Uplo: blas.Upper, + Diag: blas.NonUnit, + }, + cap: qr.qr.capCols, + } + dst.Copy(t) + + // Zero below the triangular. + for i := r; i < c; i++ { + zero(dst.mat.Data[i*dst.mat.Stride : i*dst.mat.Stride+c]) + } +} + +// QTo extracts the r×r orthonormal matrix Q from a QR decomposition. +// +// If dst is empty, QTo will resize dst to be r×r. When dst is non-empty, +// QTo will panic if dst is not r×r. QTo will also panic if the receiver +// does not contain a successful factorization. +func (qr *QR) QTo(dst *Dense) { + if !qr.isValid() { + panic(badQR) + } + + r, _ := qr.qr.Dims() + if dst.IsEmpty() { + dst.ReuseAs(r, r) + } else { + r2, c2 := dst.Dims() + if r != r2 || r != c2 { + panic(ErrShape) + } + } + + if qr.q == nil || qr.q.IsEmpty() { + qr.updateQ() + } + dst.Copy(qr.q) +} + +// SolveTo finds a minimum-norm solution to a system of linear equations defined +// by the matrices A and b, where A is an m×n matrix represented in its QR factorized +// form. If A is singular or near-singular a Condition error is returned. +// See the documentation for Condition for more information. +// +// The minimization problem solved depends on the input parameters. +// +// If trans == false, find X such that ||A*X - B||_2 is minimized. +// If trans == true, find the minimum norm solution of Aᵀ * X = B. +// +// The solution matrix, X, is stored in place into dst. +// SolveTo will panic if the receiver does not contain a factorization. +func (qr *QR) SolveTo(dst *Dense, trans bool, b Matrix) error { + if !qr.isValid() { + panic(badQR) + } + + r, c := qr.qr.Dims() + br, bc := b.Dims() + + // The QR solve algorithm stores the result in-place into the right hand side. + // The storage for the answer must be large enough to hold both b and x. + // However, this method's receiver must be the size of x. Copy b, and then + // copy the result into m at the end. + if trans { + if c != br { + panic(ErrShape) + } + dst.reuseAsNonZeroed(r, bc) + } else { + if r != br { + panic(ErrShape) + } + dst.reuseAsNonZeroed(c, bc) + } + // Do not need to worry about overlap between m and b because x has its own + // independent storage. + w := getDenseWorkspace(max(r, c), bc, false) + w.Copy(b) + t := qr.qr.asTriDense(qr.qr.mat.Cols, blas.NonUnit, blas.Upper).mat + if trans { + ok := lapack64.Trtrs(blas.Trans, t, w.mat) + if !ok { + return Condition(math.Inf(1)) + } + for i := c; i < r; i++ { + zero(w.mat.Data[i*w.mat.Stride : i*w.mat.Stride+bc]) + } + work := []float64{0} + lapack64.Ormqr(blas.Left, blas.NoTrans, qr.qr.mat, qr.tau, w.mat, work, -1) + work = getFloat64s(int(work[0]), false) + lapack64.Ormqr(blas.Left, blas.NoTrans, qr.qr.mat, qr.tau, w.mat, work, len(work)) + putFloat64s(work) + } else { + work := []float64{0} + lapack64.Ormqr(blas.Left, blas.Trans, qr.qr.mat, qr.tau, w.mat, work, -1) + work = getFloat64s(int(work[0]), false) + lapack64.Ormqr(blas.Left, blas.Trans, qr.qr.mat, qr.tau, w.mat, work, len(work)) + putFloat64s(work) + + ok := lapack64.Trtrs(blas.NoTrans, t, w.mat) + if !ok { + return Condition(math.Inf(1)) + } + } + // X was set above to be the correct size for the result. + dst.Copy(w) + putDenseWorkspace(w) + if qr.cond > ConditionTolerance { + return Condition(qr.cond) + } + return nil +} + +// SolveVecTo finds a minimum-norm solution to a system of linear equations, +// +// Ax = b. +// +// See QR.SolveTo for the full documentation. +// SolveVecTo will panic if the receiver does not contain a factorization. +func (qr *QR) SolveVecTo(dst *VecDense, trans bool, b Vector) error { + if !qr.isValid() { + panic(badQR) + } + + r, c := qr.qr.Dims() + if _, bc := b.Dims(); bc != 1 { + panic(ErrShape) + } + + // The Solve implementation is non-trivial, so rather than duplicate the code, + // instead recast the VecDenses as Dense and call the matrix code. + bm := Matrix(b) + if rv, ok := b.(RawVectorer); ok { + bmat := rv.RawVector() + if dst != b { + dst.checkOverlap(bmat) + } + b := VecDense{mat: bmat} + bm = b.asDense() + } + if trans { + dst.reuseAsNonZeroed(r) + } else { + dst.reuseAsNonZeroed(c) + } + return qr.SolveTo(dst.asDense(), trans, bm) +} diff --git a/vendor/gonum.org/v1/gonum/mat/shadow.go b/vendor/gonum.org/v1/gonum/mat/shadow.go new file mode 100644 index 0000000000..4fc24c3466 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/mat/shadow.go @@ -0,0 +1,243 @@ +// Copyright ©2015 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package mat + +import "gonum.org/v1/gonum/blas/blas64" + +// checkOverlap returns false if the receiver does not overlap data elements +// referenced by the parameter and panics otherwise. +// +// checkOverlap methods return a boolean to allow the check call to be added to a +// boolean expression, making use of short-circuit operators. +func checkOverlap(a, b blas64.General) bool { + if cap(a.Data) == 0 || cap(b.Data) == 0 { + return false + } + + off := offset(a.Data[:1], b.Data[:1]) + + if off == 0 { + // At least one element overlaps. + if a.Cols == b.Cols && a.Rows == b.Rows && a.Stride == b.Stride { + panic(regionIdentity) + } + panic(regionOverlap) + } + + if off > 0 && len(a.Data) <= off { + // We know a is completely before b. + return false + } + if off < 0 && len(b.Data) <= -off { + // We know a is completely after b. + return false + } + + if a.Stride != b.Stride && a.Stride != 1 && b.Stride != 1 { + // Too hard, so assume the worst; if either stride + // is one it will be caught in rectanglesOverlap. + panic(mismatchedStrides) + } + + if off < 0 { + off = -off + a.Cols, b.Cols = b.Cols, a.Cols + } + if rectanglesOverlap(off, a.Cols, b.Cols, min(a.Stride, b.Stride)) { + panic(regionOverlap) + } + return false +} + +func (m *Dense) checkOverlap(a blas64.General) bool { + return checkOverlap(m.RawMatrix(), a) +} + +func (m *Dense) checkOverlapMatrix(a Matrix) bool { + if m == a { + return false + } + var amat blas64.General + switch ar := a.(type) { + default: + return false + case RawMatrixer: + amat = ar.RawMatrix() + case RawSymmetricer: + amat = generalFromSymmetric(ar.RawSymmetric()) + case RawSymBander: + amat = generalFromSymmetricBand(ar.RawSymBand()) + case RawTriangular: + amat = generalFromTriangular(ar.RawTriangular()) + case RawVectorer: + r, c := a.Dims() + amat = generalFromVector(ar.RawVector(), r, c) + } + return m.checkOverlap(amat) +} + +func (s *SymDense) checkOverlap(a blas64.General) bool { + return checkOverlap(generalFromSymmetric(s.RawSymmetric()), a) +} + +func (s *SymDense) checkOverlapMatrix(a Matrix) bool { + if s == a { + return false + } + var amat blas64.General + switch ar := a.(type) { + default: + return false + case RawMatrixer: + amat = ar.RawMatrix() + case RawSymmetricer: + amat = generalFromSymmetric(ar.RawSymmetric()) + case RawSymBander: + amat = generalFromSymmetricBand(ar.RawSymBand()) + case RawTriangular: + amat = generalFromTriangular(ar.RawTriangular()) + case RawVectorer: + r, c := a.Dims() + amat = generalFromVector(ar.RawVector(), r, c) + } + return s.checkOverlap(amat) +} + +// generalFromSymmetric returns a blas64.General with the backing +// data and dimensions of a. +func generalFromSymmetric(a blas64.Symmetric) blas64.General { + return blas64.General{ + Rows: a.N, + Cols: a.N, + Stride: a.Stride, + Data: a.Data, + } +} + +func (t *TriDense) checkOverlap(a blas64.General) bool { + return checkOverlap(generalFromTriangular(t.RawTriangular()), a) +} + +func (t *TriDense) checkOverlapMatrix(a Matrix) bool { + if t == a { + return false + } + var amat blas64.General + switch ar := a.(type) { + default: + return false + case RawMatrixer: + amat = ar.RawMatrix() + case RawSymmetricer: + amat = generalFromSymmetric(ar.RawSymmetric()) + case RawSymBander: + amat = generalFromSymmetricBand(ar.RawSymBand()) + case RawTriangular: + amat = generalFromTriangular(ar.RawTriangular()) + case RawVectorer: + r, c := a.Dims() + amat = generalFromVector(ar.RawVector(), r, c) + } + return t.checkOverlap(amat) +} + +// generalFromTriangular returns a blas64.General with the backing +// data and dimensions of a. +func generalFromTriangular(a blas64.Triangular) blas64.General { + return blas64.General{ + Rows: a.N, + Cols: a.N, + Stride: a.Stride, + Data: a.Data, + } +} + +func (v *VecDense) checkOverlap(a blas64.Vector) bool { + mat := v.mat + if cap(mat.Data) == 0 || cap(a.Data) == 0 { + return false + } + + off := offset(mat.Data[:1], a.Data[:1]) + + if off == 0 { + // At least one element overlaps. + if mat.Inc == a.Inc && len(mat.Data) == len(a.Data) { + panic(regionIdentity) + } + panic(regionOverlap) + } + + if off > 0 && len(mat.Data) <= off { + // We know v is completely before a. + return false + } + if off < 0 && len(a.Data) <= -off { + // We know v is completely after a. + return false + } + + if mat.Inc != a.Inc && mat.Inc != 1 && a.Inc != 1 { + // Too hard, so assume the worst; if either + // increment is one it will be caught below. + panic(mismatchedStrides) + } + inc := min(mat.Inc, a.Inc) + + if inc == 1 || off&inc == 0 { + panic(regionOverlap) + } + return false +} + +// generalFromVector returns a blas64.General with the backing +// data and dimensions of a. +func generalFromVector(a blas64.Vector, r, c int) blas64.General { + return blas64.General{ + Rows: r, + Cols: c, + Stride: a.Inc, + Data: a.Data, + } +} + +func (s *SymBandDense) checkOverlap(a blas64.General) bool { + return checkOverlap(generalFromSymmetricBand(s.RawSymBand()), a) +} + +//lint:ignore U1000 This will be used when we do shadow checks for banded matrices. +func (s *SymBandDense) checkOverlapMatrix(a Matrix) bool { + if s == a { + return false + } + var amat blas64.General + switch ar := a.(type) { + default: + return false + case RawMatrixer: + amat = ar.RawMatrix() + case RawSymmetricer: + amat = generalFromSymmetric(ar.RawSymmetric()) + case RawSymBander: + amat = generalFromSymmetricBand(ar.RawSymBand()) + case RawTriangular: + amat = generalFromTriangular(ar.RawTriangular()) + case RawVectorer: + r, c := a.Dims() + amat = generalFromVector(ar.RawVector(), r, c) + } + return s.checkOverlap(amat) +} + +// generalFromSymmetricBand returns a blas64.General with the backing +// data and dimensions of a. +func generalFromSymmetricBand(a blas64.SymmetricBand) blas64.General { + return blas64.General{ + Rows: a.N, + Cols: a.K + 1, + Data: a.Data, + Stride: a.Stride, + } +} diff --git a/vendor/gonum.org/v1/gonum/mat/shadow_common.go b/vendor/gonum.org/v1/gonum/mat/shadow_common.go new file mode 100644 index 0000000000..e4cdf4ddee --- /dev/null +++ b/vendor/gonum.org/v1/gonum/mat/shadow_common.go @@ -0,0 +1,54 @@ +// Copyright ©2015 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package mat + +const ( + // regionOverlap is the panic string used for the general case + // of a matrix region overlap between a source and destination. + regionOverlap = "mat: bad region: overlap" + + // regionIdentity is the panic string used for the specific + // case of complete agreement between a source and a destination. + regionIdentity = "mat: bad region: identical" + + // mismatchedStrides is the panic string used for overlapping + // data slices with differing strides. + mismatchedStrides = "mat: bad region: different strides" +) + +// rectanglesOverlap returns whether the strided rectangles a and b overlap +// when b is offset by off elements after a but has at least one element before +// the end of a. off must be positive. a and b have aCols and bCols respectively. +// +// rectanglesOverlap works by shifting both matrices left such that the left +// column of a is at 0. The column indexes are flattened by obtaining the shifted +// relative left and right column positions modulo the common stride. This allows +// direct comparison of the column offsets when the matrix backing data slices +// are known to overlap. +func rectanglesOverlap(off, aCols, bCols, stride int) bool { + if stride == 1 { + // Unit stride means overlapping data + // slices must overlap as matrices. + return true + } + + // Flatten the shifted matrix column positions + // so a starts at 0, modulo the common stride. + aTo := aCols + // The mod stride operations here make the from + // and to indexes comparable between a and b when + // the data slices of a and b overlap. + bFrom := off % stride + bTo := (bFrom + bCols) % stride + + if bTo == 0 || bFrom < bTo { + // b matrix is not wrapped: compare for + // simple overlap. + return bFrom < aTo + } + + // b strictly wraps and so must overlap with a. + return true +} diff --git a/vendor/gonum.org/v1/gonum/mat/shadow_complex.go b/vendor/gonum.org/v1/gonum/mat/shadow_complex.go new file mode 100644 index 0000000000..1a3f3fc231 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/mat/shadow_complex.go @@ -0,0 +1,72 @@ +// Copyright ©2015 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// TODO(kortschak): Generate this file from shadow.go when all complex type are available. + +package mat + +import "gonum.org/v1/gonum/blas/cblas128" + +// checkOverlapComplex returns false if the receiver does not overlap data elements +// referenced by the parameter and panics otherwise. +// +// checkOverlapComplex methods return a boolean to allow the check call to be added to a +// boolean expression, making use of short-circuit operators. +func checkOverlapComplex(a, b cblas128.General) bool { + if cap(a.Data) == 0 || cap(b.Data) == 0 { + return false + } + + off := offsetComplex(a.Data[:1], b.Data[:1]) + + if off == 0 { + // At least one element overlaps. + if a.Cols == b.Cols && a.Rows == b.Rows && a.Stride == b.Stride { + panic(regionIdentity) + } + panic(regionOverlap) + } + + if off > 0 && len(a.Data) <= off { + // We know a is completely before b. + return false + } + if off < 0 && len(b.Data) <= -off { + // We know a is completely after b. + return false + } + + if a.Stride != b.Stride && a.Stride != 1 && b.Stride != 1 { + // Too hard, so assume the worst; if either stride + // is one it will be caught in rectanglesOverlap. + panic(mismatchedStrides) + } + + if off < 0 { + off = -off + a.Cols, b.Cols = b.Cols, a.Cols + } + if rectanglesOverlap(off, a.Cols, b.Cols, min(a.Stride, b.Stride)) { + panic(regionOverlap) + } + return false +} + +func (m *CDense) checkOverlap(a cblas128.General) bool { + return checkOverlapComplex(m.RawCMatrix(), a) +} + +func (m *CDense) checkOverlapMatrix(a CMatrix) bool { + if m == a { + return false + } + var amat cblas128.General + switch ar := a.(type) { + default: + return false + case RawCMatrixer: + amat = ar.RawCMatrix() + } + return m.checkOverlap(amat) +} diff --git a/vendor/gonum.org/v1/gonum/mat/solve.go b/vendor/gonum.org/v1/gonum/mat/solve.go new file mode 100644 index 0000000000..ffccce8c45 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/mat/solve.go @@ -0,0 +1,124 @@ +// Copyright ©2015 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package mat + +// Solve solves the linear least squares problem +// +// minimize over x |b - A*x|_2 +// +// where A is an m×n matrix, b is a given m element vector and x is n element +// solution vector. Solve assumes that A has full rank, that is +// +// rank(A) = min(m,n) +// +// If m >= n, Solve finds the unique least squares solution of an overdetermined +// system. +// +// If m < n, there is an infinite number of solutions that satisfy b-A*x=0. In +// this case Solve finds the unique solution of an underdetermined system that +// minimizes |x|_2. +// +// Several right-hand side vectors b and solution vectors x can be handled in a +// single call. Vectors b are stored in the columns of the m×k matrix B. Vectors +// x will be stored in-place into the n×k receiver. +// +// If the underlying matrix of a is a SolveToer, its SolveTo method is used, +// otherwise a Dense copy of a will be used for the solution. +// +// If A does not have full rank, a Condition error is returned. See the +// documentation for Condition for more information. +func (m *Dense) Solve(a, b Matrix) error { + aU, aTrans := untransposeExtract(a) + if a, ok := aU.(SolveToer); ok { + return a.SolveTo(m, aTrans, b) + } + + ar, ac := a.Dims() + br, bc := b.Dims() + if ar != br { + panic(ErrShape) + } + m.reuseAsNonZeroed(ac, bc) + + switch { + case ar == ac: + if a == b { + // x = I. + if ar == 1 { + m.mat.Data[0] = 1 + return nil + } + for i := 0; i < ar; i++ { + v := m.mat.Data[i*m.mat.Stride : i*m.mat.Stride+ac] + zero(v) + v[i] = 1 + } + return nil + } + var lu LU + lu.Factorize(a) + return lu.SolveTo(m, false, b) + case ar > ac: + var qr QR + qr.Factorize(a) + return qr.SolveTo(m, false, b) + default: + var lq LQ + lq.Factorize(a) + return lq.SolveTo(m, false, b) + } +} + +// SolveVec solves the linear least squares problem +// +// minimize over x |b - A*x|_2 +// +// where A is an m×n matrix, b is a given m element vector and x is n element +// solution vector. Solve assumes that A has full rank, that is +// +// rank(A) = min(m,n) +// +// If m >= n, Solve finds the unique least squares solution of an overdetermined +// system. +// +// If m < n, there is an infinite number of solutions that satisfy b-A*x=0. In +// this case Solve finds the unique solution of an underdetermined system that +// minimizes |x|_2. +// +// The solution vector x will be stored in-place into the receiver. +// +// If A does not have full rank, a Condition error is returned. See the +// documentation for Condition for more information. +func (v *VecDense) SolveVec(a Matrix, b Vector) error { + if _, bc := b.Dims(); bc != 1 { + panic(ErrShape) + } + _, c := a.Dims() + + // The Solve implementation is non-trivial, so rather than duplicate the code, + // instead recast the VecDenses as Dense and call the matrix code. + + if rv, ok := b.(RawVectorer); ok { + bmat := rv.RawVector() + if v != b { + v.checkOverlap(bmat) + } + v.reuseAsNonZeroed(c) + m := v.asDense() + // We conditionally create bm as m when b and v are identical + // to prevent the overlap detection code from identifying m + // and bm as overlapping but not identical. + bm := m + if v != b { + b := VecDense{mat: bmat} + bm = b.asDense() + } + return m.Solve(a, bm) + } + + v.reuseAsNonZeroed(c) + m := v.asDense() + return m.Solve(a, b) +} diff --git a/vendor/gonum.org/v1/gonum/mat/svd.go b/vendor/gonum.org/v1/gonum/mat/svd.go new file mode 100644 index 0000000000..5244d9f67d --- /dev/null +++ b/vendor/gonum.org/v1/gonum/mat/svd.go @@ -0,0 +1,425 @@ +// Copyright ©2013 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package mat + +import ( + "gonum.org/v1/gonum/blas/blas64" + "gonum.org/v1/gonum/lapack" + "gonum.org/v1/gonum/lapack/lapack64" +) + +const badRcond = "mat: invalid rcond value" + +// SVD is a type for creating and using the Singular Value Decomposition +// of a matrix. +type SVD struct { + kind SVDKind + + s []float64 + u blas64.General + vt blas64.General +} + +// SVDKind specifies the treatment of singular vectors during an SVD +// factorization. +type SVDKind int + +const ( + // SVDNone specifies that no singular vectors should be computed during + // the decomposition. + SVDNone SVDKind = 0 + + // SVDThinU specifies the thin decomposition for U should be computed. + SVDThinU SVDKind = 1 << (iota - 1) + // SVDFullU specifies the full decomposition for U should be computed. + SVDFullU + // SVDThinV specifies the thin decomposition for V should be computed. + SVDThinV + // SVDFullV specifies the full decomposition for V should be computed. + SVDFullV + + // SVDThin is a convenience value for computing both thin vectors. + SVDThin SVDKind = SVDThinU | SVDThinV + // SVDFull is a convenience value for computing both full vectors. + SVDFull SVDKind = SVDFullU | SVDFullV +) + +// succFact returns whether the receiver contains a successful factorization. +func (svd *SVD) succFact() bool { + return len(svd.s) != 0 +} + +// Factorize computes the singular value decomposition (SVD) of the input matrix A. +// The singular values of A are computed in all cases, while the singular +// vectors are optionally computed depending on the input kind. +// +// The full singular value decomposition (kind == SVDFull) is a factorization +// of an m×n matrix A of the form +// +// A = U * Σ * Vᵀ +// +// where Σ is an m×n diagonal matrix, U is an m×m orthogonal matrix, and V is an +// n×n orthogonal matrix. The diagonal elements of Σ are the singular values of A. +// The first min(m,n) columns of U and V are, respectively, the left and right +// singular vectors of A. +// +// Significant storage space can be saved by using the thin representation of +// the SVD (kind == SVDThin) instead of the full SVD, especially if +// m >> n or m << n. The thin SVD finds +// +// A = U~ * Σ * V~ᵀ +// +// where U~ is of size m×min(m,n), Σ is a diagonal matrix of size min(m,n)×min(m,n) +// and V~ is of size n×min(m,n). +// +// Factorize returns whether the decomposition succeeded. If the decomposition +// failed, routines that require a successful factorization will panic. +func (svd *SVD) Factorize(a Matrix, kind SVDKind) (ok bool) { + // kill previous factorization + svd.s = svd.s[:0] + svd.kind = kind + + m, n := a.Dims() + var jobU, jobVT lapack.SVDJob + + // TODO(btracey): This code should be modified to have the smaller + // matrix written in-place into aCopy when the lapack/native/dgesvd + // implementation is complete. + switch { + case kind&SVDFullU != 0: + jobU = lapack.SVDAll + svd.u = blas64.General{ + Rows: m, + Cols: m, + Stride: m, + Data: use(svd.u.Data, m*m), + } + case kind&SVDThinU != 0: + jobU = lapack.SVDStore + svd.u = blas64.General{ + Rows: m, + Cols: min(m, n), + Stride: min(m, n), + Data: use(svd.u.Data, m*min(m, n)), + } + default: + jobU = lapack.SVDNone + } + switch { + case kind&SVDFullV != 0: + svd.vt = blas64.General{ + Rows: n, + Cols: n, + Stride: n, + Data: use(svd.vt.Data, n*n), + } + jobVT = lapack.SVDAll + case kind&SVDThinV != 0: + svd.vt = blas64.General{ + Rows: min(m, n), + Cols: n, + Stride: n, + Data: use(svd.vt.Data, min(m, n)*n), + } + jobVT = lapack.SVDStore + default: + jobVT = lapack.SVDNone + } + + // A is destroyed on call, so copy the matrix. + aCopy := DenseCopyOf(a) + svd.kind = kind + svd.s = use(svd.s, min(m, n)) + + work := []float64{0} + lapack64.Gesvd(jobU, jobVT, aCopy.mat, svd.u, svd.vt, svd.s, work, -1) + work = getFloat64s(int(work[0]), false) + ok = lapack64.Gesvd(jobU, jobVT, aCopy.mat, svd.u, svd.vt, svd.s, work, len(work)) + putFloat64s(work) + if !ok { + svd.kind = 0 + } + return ok +} + +// Kind returns the SVDKind of the decomposition. If no decomposition has been +// computed, Kind returns -1. +func (svd *SVD) Kind() SVDKind { + if !svd.succFact() { + return -1 + } + return svd.kind +} + +// Rank returns the rank of A based on the count of singular values greater than +// rcond scaled by the largest singular value. +// Rank will panic if the receiver does not contain a successful factorization or +// rcond is negative. +func (svd *SVD) Rank(rcond float64) int { + if rcond < 0 { + panic(badRcond) + } + if !svd.succFact() { + panic(badFact) + } + s0 := svd.s[0] + for i, v := range svd.s { + if v <= rcond*s0 { + return i + } + } + return len(svd.s) +} + +// Cond returns the 2-norm condition number for the factorized matrix. Cond will +// panic if the receiver does not contain a successful factorization. +func (svd *SVD) Cond() float64 { + if !svd.succFact() { + panic(badFact) + } + return svd.s[0] / svd.s[len(svd.s)-1] +} + +// Values returns the singular values of the factorized matrix in descending order. +// +// If the input slice is non-nil, the values will be stored in-place into +// the slice. In this case, the slice must have length min(m,n), and Values will +// panic with ErrSliceLengthMismatch otherwise. If the input slice is nil, a new +// slice of the appropriate length will be allocated and returned. +// +// Values will panic if the receiver does not contain a successful factorization. +func (svd *SVD) Values(s []float64) []float64 { + if !svd.succFact() { + panic(badFact) + } + if s == nil { + s = make([]float64, len(svd.s)) + } + if len(s) != len(svd.s) { + panic(ErrSliceLengthMismatch) + } + copy(s, svd.s) + return s +} + +// UTo extracts the matrix U from the singular value decomposition. The first +// min(m,n) columns are the left singular vectors and correspond to the singular +// values as returned from SVD.Values. +// +// If dst is empty, UTo will resize dst to be m×m if the full U was computed +// and size m×min(m,n) if the thin U was computed. When dst is non-empty, then +// UTo will panic if dst is not the appropriate size. UTo will also panic if +// the receiver does not contain a successful factorization, or if U was +// not computed during factorization. +func (svd *SVD) UTo(dst *Dense) { + if !svd.succFact() { + panic(badFact) + } + kind := svd.kind + if kind&SVDThinU == 0 && kind&SVDFullU == 0 { + panic("svd: u not computed during factorization") + } + r := svd.u.Rows + c := svd.u.Cols + if dst.IsEmpty() { + dst.ReuseAs(r, c) + } else { + r2, c2 := dst.Dims() + if r != r2 || c != c2 { + panic(ErrShape) + } + } + + tmp := &Dense{ + mat: svd.u, + capRows: r, + capCols: c, + } + dst.Copy(tmp) +} + +// VTo extracts the matrix V from the singular value decomposition. The first +// min(m,n) columns are the right singular vectors and correspond to the singular +// values as returned from SVD.Values. +// +// If dst is empty, VTo will resize dst to be n×n if the full V was computed +// and size n×min(m,n) if the thin V was computed. When dst is non-empty, then +// VTo will panic if dst is not the appropriate size. VTo will also panic if +// the receiver does not contain a successful factorization, or if V was +// not computed during factorization. +func (svd *SVD) VTo(dst *Dense) { + if !svd.succFact() { + panic(badFact) + } + kind := svd.kind + if kind&SVDThinV == 0 && kind&SVDFullV == 0 { + panic("svd: v not computed during factorization") + } + r := svd.vt.Rows + c := svd.vt.Cols + if dst.IsEmpty() { + dst.ReuseAs(c, r) + } else { + r2, c2 := dst.Dims() + if c != r2 || r != c2 { + panic(ErrShape) + } + } + + tmp := &Dense{ + mat: svd.vt, + capRows: r, + capCols: c, + } + dst.Copy(tmp.T()) +} + +// SolveTo calculates the minimum-norm solution to a linear least squares problem +// +// minimize over n-element vectors x: |b - A*x|_2 and |x|_2 +// +// where b is a given m-element vector, using the SVD of m×n matrix A stored in +// the receiver. A may be rank-deficient, that is, the given effective rank can be +// +// rank ≤ min(m,n) +// +// The rank can be computed using SVD.Rank. +// +// Several right-hand side vectors b and solution vectors x can be handled in a +// single call. Vectors b are stored in the columns of the m×k matrix B and the +// resulting vectors x will be stored in the columns of dst. dst must be either +// empty or have the size equal to n×k. +// +// The decomposition must have been factorized computing both the U and V +// singular vectors. +// +// SolveTo returns the residuals calculated from the complete SVD. For this +// value to be valid the factorization must have been performed with at least +// SVDFullU. +func (svd *SVD) SolveTo(dst *Dense, b Matrix, rank int) []float64 { + if !svd.succFact() { + panic(badFact) + } + if rank < 1 || len(svd.s) < rank { + panic("svd: rank out of range") + } + kind := svd.kind + if kind&SVDThinU == 0 && kind&SVDFullU == 0 { + panic("svd: u not computed during factorization") + } + if kind&SVDThinV == 0 && kind&SVDFullV == 0 { + panic("svd: v not computed during factorization") + } + + u := Dense{ + mat: svd.u, + capRows: svd.u.Rows, + capCols: svd.u.Cols, + } + vt := Dense{ + mat: svd.vt, + capRows: svd.vt.Rows, + capCols: svd.vt.Cols, + } + s := svd.s[:rank] + + _, bc := b.Dims() + c := getDenseWorkspace(svd.u.Cols, bc, false) + defer putDenseWorkspace(c) + c.Mul(u.T(), b) + + y := getDenseWorkspace(rank, bc, false) + defer putDenseWorkspace(y) + y.DivElem(c.slice(0, rank, 0, bc), repVector{vec: s, cols: bc}) + dst.Mul(vt.slice(0, rank, 0, svd.vt.Cols).T(), y) + + res := make([]float64, bc) + if rank < svd.u.Cols { + c = c.slice(len(s), svd.u.Cols, 0, bc) + for j := range res { + col := c.ColView(j) + res[j] = Dot(col, col) + } + } + return res +} + +type repVector struct { + vec []float64 + cols int +} + +func (m repVector) Dims() (r, c int) { return len(m.vec), m.cols } +func (m repVector) At(i, j int) float64 { + if i < 0 || len(m.vec) <= i || j < 0 || m.cols <= j { + panic(ErrIndexOutOfRange.string) // Panic with string to prevent mat.Error recovery. + } + return m.vec[i] +} +func (m repVector) T() Matrix { return Transpose{m} } + +// SolveVecTo calculates the minimum-norm solution to a linear least squares problem +// +// minimize over n-element vectors x: |b - A*x|_2 and |x|_2 +// +// where b is a given m-element vector, using the SVD of m×n matrix A stored in +// the receiver. A may be rank-deficient, that is, the given effective rank can be +// +// rank ≤ min(m,n) +// +// The rank can be computed using SVD.Rank. +// +// The resulting vector x will be stored in dst. dst must be either empty or +// have length equal to n. +// +// The decomposition must have been factorized computing both the U and V +// singular vectors. +// +// SolveVecTo returns the residuals calculated from the complete SVD. For this +// value to be valid the factorization must have been performed with at least +// SVDFullU. +func (svd *SVD) SolveVecTo(dst *VecDense, b Vector, rank int) float64 { + if !svd.succFact() { + panic(badFact) + } + if rank < 1 || len(svd.s) < rank { + panic("svd: rank out of range") + } + kind := svd.kind + if kind&SVDThinU == 0 && kind&SVDFullU == 0 { + panic("svd: u not computed during factorization") + } + if kind&SVDThinV == 0 && kind&SVDFullV == 0 { + panic("svd: v not computed during factorization") + } + + u := Dense{ + mat: svd.u, + capRows: svd.u.Rows, + capCols: svd.u.Cols, + } + vt := Dense{ + mat: svd.vt, + capRows: svd.vt.Rows, + capCols: svd.vt.Cols, + } + s := svd.s[:rank] + + c := getVecDenseWorkspace(svd.u.Cols, false) + defer putVecDenseWorkspace(c) + c.MulVec(u.T(), b) + + y := getVecDenseWorkspace(rank, false) + defer putVecDenseWorkspace(y) + y.DivElemVec(c.sliceVec(0, rank), NewVecDense(rank, s)) + dst.MulVec(vt.slice(0, rank, 0, svd.vt.Cols).T(), y) + + var res float64 + if rank < c.Len() { + c = c.sliceVec(rank, c.Len()) + res = Dot(c, c) + } + return res +} diff --git a/vendor/gonum.org/v1/gonum/mat/symband.go b/vendor/gonum.org/v1/gonum/mat/symband.go new file mode 100644 index 0000000000..63638ea912 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/mat/symband.go @@ -0,0 +1,312 @@ +// Copyright ©2017 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package mat + +import ( + "gonum.org/v1/gonum/blas" + "gonum.org/v1/gonum/blas/blas64" + "gonum.org/v1/gonum/lapack" + "gonum.org/v1/gonum/lapack/lapack64" +) + +var ( + symBandDense *SymBandDense + _ Matrix = symBandDense + _ allMatrix = symBandDense + _ denseMatrix = symBandDense + _ Symmetric = symBandDense + _ Banded = symBandDense + _ SymBanded = symBandDense + _ RawSymBander = symBandDense + _ MutableSymBanded = symBandDense + + _ NonZeroDoer = symBandDense + _ RowNonZeroDoer = symBandDense + _ ColNonZeroDoer = symBandDense +) + +// SymBandDense represents a symmetric band matrix in dense storage format. +type SymBandDense struct { + mat blas64.SymmetricBand +} + +// SymBanded is a symmetric band matrix interface type. +type SymBanded interface { + Banded + + // SymmetricDim returns the number of rows/columns in the matrix. + SymmetricDim() int + + // SymBand returns the number of rows/columns in the matrix, and the size of + // the bandwidth. + SymBand() (n, k int) +} + +// MutableSymBanded is a symmetric band matrix interface type that allows elements +// to be altered. +type MutableSymBanded interface { + SymBanded + SetSymBand(i, j int, v float64) +} + +// A RawSymBander can return a blas64.SymmetricBand representation of the receiver. +// Changes to the blas64.SymmetricBand.Data slice will be reflected in the original +// matrix, changes to the N, K, Stride and Uplo fields will not. +type RawSymBander interface { + RawSymBand() blas64.SymmetricBand +} + +// NewSymBandDense creates a new SymBand matrix with n rows and columns. If data == nil, +// a new slice is allocated for the backing slice. If len(data) == n*(k+1), +// data is used as the backing slice, and changes to the elements of the returned +// SymBandDense will be reflected in data. If neither of these is true, NewSymBandDense +// will panic. k must be at least zero and less than n, otherwise NewSymBandDense will panic. +// +// The data must be arranged in row-major order constructed by removing the zeros +// from the rows outside the band and aligning the diagonals. SymBandDense matrices +// are stored in the upper triangle. For example, the matrix +// +// 1 2 3 0 0 0 +// 2 4 5 6 0 0 +// 3 5 7 8 9 0 +// 0 6 8 10 11 12 +// 0 0 9 11 13 14 +// 0 0 0 12 14 15 +// +// becomes (* entries are never accessed) +// +// 1 2 3 +// 4 5 6 +// 7 8 9 +// 10 11 12 +// 13 14 * +// 15 * * +// +// which is passed to NewSymBandDense as []float64{1, 2, ..., 15, *, *, *} with k=2. +// Only the values in the band portion of the matrix are used. +func NewSymBandDense(n, k int, data []float64) *SymBandDense { + if n <= 0 || k < 0 { + if n == 0 { + panic(ErrZeroLength) + } + panic("mat: negative dimension") + } + if k+1 > n { + panic("mat: band out of range") + } + bc := k + 1 + if data != nil && len(data) != n*bc { + panic(ErrShape) + } + if data == nil { + data = make([]float64, n*bc) + } + return &SymBandDense{ + mat: blas64.SymmetricBand{ + N: n, + K: k, + Stride: bc, + Uplo: blas.Upper, + Data: data, + }, + } +} + +// Dims returns the number of rows and columns in the matrix. +func (s *SymBandDense) Dims() (r, c int) { + return s.mat.N, s.mat.N +} + +// SymmetricDim returns the size of the receiver. +func (s *SymBandDense) SymmetricDim() int { + return s.mat.N +} + +// Bandwidth returns the bandwidths of the matrix. +func (s *SymBandDense) Bandwidth() (kl, ku int) { + return s.mat.K, s.mat.K +} + +// SymBand returns the number of rows/columns in the matrix, and the size of +// the bandwidth. +func (s *SymBandDense) SymBand() (n, k int) { + return s.mat.N, s.mat.K +} + +// T implements the Matrix interface. Symmetric matrices, by definition, are +// equal to their transpose, and this is a no-op. +func (s *SymBandDense) T() Matrix { + return s +} + +// TBand implements the Banded interface. +func (s *SymBandDense) TBand() Banded { + return s +} + +// RawSymBand returns the underlying blas64.SymBand used by the receiver. +// Changes to elements in the receiver following the call will be reflected +// in returned blas64.SymBand. +func (s *SymBandDense) RawSymBand() blas64.SymmetricBand { + return s.mat +} + +// SetRawSymBand sets the underlying blas64.SymmetricBand used by the receiver. +// Changes to elements in the receiver following the call will be reflected +// in the input. +// +// The supplied SymmetricBand must use blas.Upper storage format. +func (s *SymBandDense) SetRawSymBand(mat blas64.SymmetricBand) { + if mat.Uplo != blas.Upper { + panic("mat: blas64.SymmetricBand does not have blas.Upper storage") + } + s.mat = mat +} + +// IsEmpty returns whether the receiver is empty. Empty matrices can be the +// receiver for size-restricted operations. The receiver can be emptied using +// Reset. +func (s *SymBandDense) IsEmpty() bool { + return s.mat.Stride == 0 +} + +// Reset empties the matrix so that it can be reused as the +// receiver of a dimensionally restricted operation. +// +// Reset should not be used when the matrix shares backing data. +// See the Reseter interface for more information. +func (s *SymBandDense) Reset() { + s.mat.N = 0 + s.mat.K = 0 + s.mat.Stride = 0 + s.mat.Uplo = 0 + s.mat.Data = s.mat.Data[:0] +} + +// Zero sets all of the matrix elements to zero. +func (s *SymBandDense) Zero() { + for i := 0; i < s.mat.N; i++ { + u := min(1+s.mat.K, s.mat.N-i) + zero(s.mat.Data[i*s.mat.Stride : i*s.mat.Stride+u]) + } +} + +// DiagView returns the diagonal as a matrix backed by the original data. +func (s *SymBandDense) DiagView() Diagonal { + n := s.mat.N + return &DiagDense{ + mat: blas64.Vector{ + N: n, + Inc: s.mat.Stride, + Data: s.mat.Data[:(n-1)*s.mat.Stride+1], + }, + } +} + +// DoNonZero calls the function fn for each of the non-zero elements of s. The function fn +// takes a row/column index and the element value of s at (i, j). +func (s *SymBandDense) DoNonZero(fn func(i, j int, v float64)) { + for i := 0; i < s.mat.N; i++ { + for j := max(0, i-s.mat.K); j < min(s.mat.N, i+s.mat.K+1); j++ { + v := s.at(i, j) + if v != 0 { + fn(i, j, v) + } + } + } +} + +// DoRowNonZero calls the function fn for each of the non-zero elements of row i of s. The function fn +// takes a row/column index and the element value of s at (i, j). +func (s *SymBandDense) DoRowNonZero(i int, fn func(i, j int, v float64)) { + if i < 0 || s.mat.N <= i { + panic(ErrRowAccess) + } + for j := max(0, i-s.mat.K); j < min(s.mat.N, i+s.mat.K+1); j++ { + v := s.at(i, j) + if v != 0 { + fn(i, j, v) + } + } +} + +// DoColNonZero calls the function fn for each of the non-zero elements of column j of s. The function fn +// takes a row/column index and the element value of s at (i, j). +func (s *SymBandDense) DoColNonZero(j int, fn func(i, j int, v float64)) { + if j < 0 || s.mat.N <= j { + panic(ErrColAccess) + } + for i := 0; i < s.mat.N; i++ { + if i-s.mat.K <= j && j < i+s.mat.K+1 { + v := s.at(i, j) + if v != 0 { + fn(i, j, v) + } + } + } +} + +// Norm returns the specified norm of the receiver. Valid norms are: +// +// 1 - The maximum absolute column sum +// 2 - The Frobenius norm, the square root of the sum of the squares of the elements +// Inf - The maximum absolute row sum +// +// Norm will panic with ErrNormOrder if an illegal norm is specified and with +// ErrZeroLength if the matrix has zero size. +func (s *SymBandDense) Norm(norm float64) float64 { + if s.IsEmpty() { + panic(ErrZeroLength) + } + lnorm := normLapack(norm, false) + if lnorm == lapack.MaxColumnSum || lnorm == lapack.MaxRowSum { + work := getFloat64s(s.mat.N, false) + defer putFloat64s(work) + return lapack64.Lansb(lnorm, s.mat, work) + } + return lapack64.Lansb(lnorm, s.mat, nil) +} + +// Trace returns the trace of the matrix. +// +// Trace will panic with ErrZeroLength if the matrix has zero size. +func (s *SymBandDense) Trace() float64 { + if s.IsEmpty() { + panic(ErrZeroLength) + } + rb := s.RawSymBand() + var tr float64 + for i := 0; i < rb.N; i++ { + tr += rb.Data[i*rb.Stride] + } + return tr +} + +// MulVecTo computes S⋅x storing the result into dst. +func (s *SymBandDense) MulVecTo(dst *VecDense, _ bool, x Vector) { + n := s.mat.N + if x.Len() != n { + panic(ErrShape) + } + dst.reuseAsNonZeroed(n) + + xMat, _ := untransposeExtract(x) + if xVec, ok := xMat.(*VecDense); ok { + if dst != xVec { + dst.checkOverlap(xVec.mat) + blas64.Sbmv(1, s.mat, xVec.mat, 0, dst.mat) + } else { + xCopy := getVecDenseWorkspace(n, false) + xCopy.CloneFromVec(xVec) + blas64.Sbmv(1, s.mat, xCopy.mat, 0, dst.mat) + putVecDenseWorkspace(xCopy) + } + } else { + xCopy := getVecDenseWorkspace(n, false) + xCopy.CloneFromVec(x) + blas64.Sbmv(1, s.mat, xCopy.mat, 0, dst.mat) + putVecDenseWorkspace(xCopy) + } +} diff --git a/vendor/gonum.org/v1/gonum/mat/symmetric.go b/vendor/gonum.org/v1/gonum/mat/symmetric.go new file mode 100644 index 0000000000..e38e4c7b6f --- /dev/null +++ b/vendor/gonum.org/v1/gonum/mat/symmetric.go @@ -0,0 +1,698 @@ +// Copyright ©2015 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package mat + +import ( + "math" + + "gonum.org/v1/gonum/blas" + "gonum.org/v1/gonum/blas/blas64" + "gonum.org/v1/gonum/lapack" + "gonum.org/v1/gonum/lapack/lapack64" +) + +var ( + symDense *SymDense + + _ Matrix = symDense + _ allMatrix = symDense + _ denseMatrix = symDense + _ Symmetric = symDense + _ RawSymmetricer = symDense + _ MutableSymmetric = symDense +) + +const badSymTriangle = "mat: blas64.Symmetric not upper" + +// SymDense is a symmetric matrix that uses dense storage. SymDense +// matrices are stored in the upper triangle. +type SymDense struct { + mat blas64.Symmetric + cap int +} + +// Symmetric represents a symmetric matrix (where the element at {i, j} equals +// the element at {j, i}). Symmetric matrices are always square. +type Symmetric interface { + Matrix + // SymmetricDim returns the number of rows/columns in the matrix. + SymmetricDim() int +} + +// A RawSymmetricer can return a view of itself as a BLAS Symmetric matrix. +type RawSymmetricer interface { + RawSymmetric() blas64.Symmetric +} + +// A MutableSymmetric can set elements of a symmetric matrix. +type MutableSymmetric interface { + Symmetric + SetSym(i, j int, v float64) +} + +// NewSymDense creates a new Symmetric matrix with n rows and columns. If data == nil, +// a new slice is allocated for the backing slice. If len(data) == n*n, data is +// used as the backing slice, and changes to the elements of the returned SymDense +// will be reflected in data. If neither of these is true, NewSymDense will panic. +// NewSymDense will panic if n is zero. +// +// The data must be arranged in row-major order, i.e. the (i*c + j)-th +// element in the data slice is the {i, j}-th element in the matrix. +// Only the values in the upper triangular portion of the matrix are used. +func NewSymDense(n int, data []float64) *SymDense { + if n <= 0 { + if n == 0 { + panic(ErrZeroLength) + } + panic("mat: negative dimension") + } + if data != nil && n*n != len(data) { + panic(ErrShape) + } + if data == nil { + data = make([]float64, n*n) + } + return &SymDense{ + mat: blas64.Symmetric{ + N: n, + Stride: n, + Data: data, + Uplo: blas.Upper, + }, + cap: n, + } +} + +// Dims returns the number of rows and columns in the matrix. +func (s *SymDense) Dims() (r, c int) { + return s.mat.N, s.mat.N +} + +// Caps returns the number of rows and columns in the backing matrix. +func (s *SymDense) Caps() (r, c int) { + return s.cap, s.cap +} + +// T returns the receiver, the transpose of a symmetric matrix. +func (s *SymDense) T() Matrix { + return s +} + +// SymmetricDim implements the Symmetric interface and returns the number of rows +// and columns in the matrix. +func (s *SymDense) SymmetricDim() int { + return s.mat.N +} + +// RawSymmetric returns the matrix as a blas64.Symmetric. The returned +// value must be stored in upper triangular format. +func (s *SymDense) RawSymmetric() blas64.Symmetric { + return s.mat +} + +// SetRawSymmetric sets the underlying blas64.Symmetric used by the receiver. +// Changes to elements in the receiver following the call will be reflected +// in the input. +// +// The supplied Symmetric must use blas.Upper storage format. +func (s *SymDense) SetRawSymmetric(mat blas64.Symmetric) { + if mat.Uplo != blas.Upper { + panic(badSymTriangle) + } + s.cap = mat.N + s.mat = mat +} + +// Reset empties the matrix so that it can be reused as the +// receiver of a dimensionally restricted operation. +// +// Reset should not be used when the matrix shares backing data. +// See the Reseter interface for more information. +func (s *SymDense) Reset() { + // N and Stride must be zeroed in unison. + s.mat.N, s.mat.Stride = 0, 0 + s.mat.Data = s.mat.Data[:0] +} + +// ReuseAsSym changes the receiver if it IsEmpty() to be of size n×n. +// +// ReuseAsSym re-uses the backing data slice if it has sufficient capacity, +// otherwise a new slice is allocated. The backing data is zero on return. +// +// ReuseAsSym panics if the receiver is not empty, and panics if +// the input size is less than one. To empty the receiver for re-use, +// Reset should be used. +func (s *SymDense) ReuseAsSym(n int) { + if n <= 0 { + if n == 0 { + panic(ErrZeroLength) + } + panic(ErrNegativeDimension) + } + if !s.IsEmpty() { + panic(ErrReuseNonEmpty) + } + s.reuseAsZeroed(n) +} + +// Zero sets all of the matrix elements to zero. +func (s *SymDense) Zero() { + for i := 0; i < s.mat.N; i++ { + zero(s.mat.Data[i*s.mat.Stride+i : i*s.mat.Stride+s.mat.N]) + } +} + +// IsEmpty returns whether the receiver is empty. Empty matrices can be the +// receiver for size-restricted operations. The receiver can be emptied using +// Reset. +func (s *SymDense) IsEmpty() bool { + // It must be the case that m.Dims() returns + // zeros in this case. See comment in Reset(). + return s.mat.N == 0 +} + +// reuseAsNonZeroed resizes an empty matrix to a n×n matrix, +// or checks that a non-empty matrix is n×n. +func (s *SymDense) reuseAsNonZeroed(n int) { + // reuseAsNonZeroed must be kept in sync with reuseAsZeroed. + if n == 0 { + panic(ErrZeroLength) + } + if s.mat.N > s.cap { + // Panic as a string, not a mat.Error. + panic(badCap) + } + if s.IsEmpty() { + s.mat = blas64.Symmetric{ + N: n, + Stride: n, + Data: use(s.mat.Data, n*n), + Uplo: blas.Upper, + } + s.cap = n + return + } + if s.mat.Uplo != blas.Upper { + panic(badSymTriangle) + } + if s.mat.N != n { + panic(ErrShape) + } +} + +// reuseAsNonZeroed resizes an empty matrix to a n×n matrix, +// or checks that a non-empty matrix is n×n. It then zeros the +// elements of the matrix. +func (s *SymDense) reuseAsZeroed(n int) { + // reuseAsZeroed must be kept in sync with reuseAsNonZeroed. + if n == 0 { + panic(ErrZeroLength) + } + if s.mat.N > s.cap { + // Panic as a string, not a mat.Error. + panic(badCap) + } + if s.IsEmpty() { + s.mat = blas64.Symmetric{ + N: n, + Stride: n, + Data: useZeroed(s.mat.Data, n*n), + Uplo: blas.Upper, + } + s.cap = n + return + } + if s.mat.Uplo != blas.Upper { + panic(badSymTriangle) + } + if s.mat.N != n { + panic(ErrShape) + } + s.Zero() +} + +func (s *SymDense) isolatedWorkspace(a Symmetric) (w *SymDense, restore func()) { + n := a.SymmetricDim() + if n == 0 { + panic(ErrZeroLength) + } + w = getSymDenseWorkspace(n, false) + return w, func() { + s.CopySym(w) + putSymDenseWorkspace(w) + } +} + +// DiagView returns the diagonal as a matrix backed by the original data. +func (s *SymDense) DiagView() Diagonal { + n := s.mat.N + return &DiagDense{ + mat: blas64.Vector{ + N: n, + Inc: s.mat.Stride + 1, + Data: s.mat.Data[:(n-1)*s.mat.Stride+n], + }, + } +} + +func (s *SymDense) AddSym(a, b Symmetric) { + n := a.SymmetricDim() + if n != b.SymmetricDim() { + panic(ErrShape) + } + s.reuseAsNonZeroed(n) + + if a, ok := a.(RawSymmetricer); ok { + if b, ok := b.(RawSymmetricer); ok { + amat, bmat := a.RawSymmetric(), b.RawSymmetric() + if s != a { + s.checkOverlap(generalFromSymmetric(amat)) + } + if s != b { + s.checkOverlap(generalFromSymmetric(bmat)) + } + for i := 0; i < n; i++ { + btmp := bmat.Data[i*bmat.Stride+i : i*bmat.Stride+n] + stmp := s.mat.Data[i*s.mat.Stride+i : i*s.mat.Stride+n] + for j, v := range amat.Data[i*amat.Stride+i : i*amat.Stride+n] { + stmp[j] = v + btmp[j] + } + } + return + } + } + + s.checkOverlapMatrix(a) + s.checkOverlapMatrix(b) + for i := 0; i < n; i++ { + stmp := s.mat.Data[i*s.mat.Stride : i*s.mat.Stride+n] + for j := i; j < n; j++ { + stmp[j] = a.At(i, j) + b.At(i, j) + } + } +} + +func (s *SymDense) CopySym(a Symmetric) int { + n := a.SymmetricDim() + n = min(n, s.mat.N) + if n == 0 { + return 0 + } + switch a := a.(type) { + case RawSymmetricer: + amat := a.RawSymmetric() + if amat.Uplo != blas.Upper { + panic(badSymTriangle) + } + for i := 0; i < n; i++ { + copy(s.mat.Data[i*s.mat.Stride+i:i*s.mat.Stride+n], amat.Data[i*amat.Stride+i:i*amat.Stride+n]) + } + default: + for i := 0; i < n; i++ { + stmp := s.mat.Data[i*s.mat.Stride : i*s.mat.Stride+n] + for j := i; j < n; j++ { + stmp[j] = a.At(i, j) + } + } + } + return n +} + +// SymRankOne performs a symmetric rank-one update to the matrix a with x, +// which is treated as a column vector, and stores the result in the receiver +// +// s = a + alpha * x * xᵀ +func (s *SymDense) SymRankOne(a Symmetric, alpha float64, x Vector) { + n := x.Len() + if a.SymmetricDim() != n { + panic(ErrShape) + } + s.reuseAsNonZeroed(n) + + if s != a { + if rs, ok := a.(RawSymmetricer); ok { + s.checkOverlap(generalFromSymmetric(rs.RawSymmetric())) + } + s.CopySym(a) + } + + xU, _ := untransposeExtract(x) + if rv, ok := xU.(*VecDense); ok { + r, c := xU.Dims() + xmat := rv.mat + s.checkOverlap(generalFromVector(xmat, r, c)) + blas64.Syr(alpha, xmat, s.mat) + return + } + + for i := 0; i < n; i++ { + for j := i; j < n; j++ { + s.set(i, j, s.at(i, j)+alpha*x.AtVec(i)*x.AtVec(j)) + } + } +} + +// SymRankK performs a symmetric rank-k update to the matrix a and stores the +// result into the receiver. If a is zero, see SymOuterK. +// +// s = a + alpha * x * x' +func (s *SymDense) SymRankK(a Symmetric, alpha float64, x Matrix) { + n := a.SymmetricDim() + r, _ := x.Dims() + if r != n { + panic(ErrShape) + } + xMat, aTrans := untransposeExtract(x) + var g blas64.General + if rm, ok := xMat.(*Dense); ok { + g = rm.mat + } else { + g = DenseCopyOf(x).mat + aTrans = false + } + if a != s { + if rs, ok := a.(RawSymmetricer); ok { + s.checkOverlap(generalFromSymmetric(rs.RawSymmetric())) + } + s.reuseAsNonZeroed(n) + s.CopySym(a) + } + t := blas.NoTrans + if aTrans { + t = blas.Trans + } + blas64.Syrk(t, alpha, g, 1, s.mat) +} + +// SymOuterK calculates the outer product of x with itself and stores +// the result into the receiver. It is equivalent to the matrix +// multiplication +// +// s = alpha * x * x'. +// +// In order to update an existing matrix, see SymRankOne. +func (s *SymDense) SymOuterK(alpha float64, x Matrix) { + n, _ := x.Dims() + switch { + case s.IsEmpty(): + s.mat = blas64.Symmetric{ + N: n, + Stride: n, + Data: useZeroed(s.mat.Data, n*n), + Uplo: blas.Upper, + } + s.cap = n + s.SymRankK(s, alpha, x) + case s.mat.Uplo != blas.Upper: + panic(badSymTriangle) + case s.mat.N == n: + if s == x { + w := getSymDenseWorkspace(n, true) + w.SymRankK(w, alpha, x) + s.CopySym(w) + putSymDenseWorkspace(w) + } else { + switch r := x.(type) { + case RawMatrixer: + s.checkOverlap(r.RawMatrix()) + case RawSymmetricer: + s.checkOverlap(generalFromSymmetric(r.RawSymmetric())) + case RawTriangular: + s.checkOverlap(generalFromTriangular(r.RawTriangular())) + } + // Only zero the upper triangle. + for i := 0; i < n; i++ { + ri := i * s.mat.Stride + zero(s.mat.Data[ri+i : ri+n]) + } + s.SymRankK(s, alpha, x) + } + default: + panic(ErrShape) + } +} + +// RankTwo performs a symmetric rank-two update to the matrix a with the +// vectors x and y, which are treated as column vectors, and stores the +// result in the receiver +// +// m = a + alpha * (x * yᵀ + y * xᵀ) +func (s *SymDense) RankTwo(a Symmetric, alpha float64, x, y Vector) { + n := s.mat.N + if x.Len() != n { + panic(ErrShape) + } + if y.Len() != n { + panic(ErrShape) + } + + if s != a { + if rs, ok := a.(RawSymmetricer); ok { + s.checkOverlap(generalFromSymmetric(rs.RawSymmetric())) + } + } + + var xmat, ymat blas64.Vector + fast := true + xU, _ := untransposeExtract(x) + if rv, ok := xU.(*VecDense); ok { + r, c := xU.Dims() + xmat = rv.mat + s.checkOverlap(generalFromVector(xmat, r, c)) + } else { + fast = false + } + yU, _ := untransposeExtract(y) + if rv, ok := yU.(*VecDense); ok { + r, c := yU.Dims() + ymat = rv.mat + s.checkOverlap(generalFromVector(ymat, r, c)) + } else { + fast = false + } + + if s != a { + if rs, ok := a.(RawSymmetricer); ok { + s.checkOverlap(generalFromSymmetric(rs.RawSymmetric())) + } + s.reuseAsNonZeroed(n) + s.CopySym(a) + } + + if fast { + if s != a { + s.reuseAsNonZeroed(n) + s.CopySym(a) + } + blas64.Syr2(alpha, xmat, ymat, s.mat) + return + } + + for i := 0; i < n; i++ { + s.reuseAsNonZeroed(n) + for j := i; j < n; j++ { + s.set(i, j, a.At(i, j)+alpha*(x.AtVec(i)*y.AtVec(j)+y.AtVec(i)*x.AtVec(j))) + } + } +} + +// ScaleSym multiplies the elements of a by f, placing the result in the receiver. +func (s *SymDense) ScaleSym(f float64, a Symmetric) { + n := a.SymmetricDim() + s.reuseAsNonZeroed(n) + if a, ok := a.(RawSymmetricer); ok { + amat := a.RawSymmetric() + if s != a { + s.checkOverlap(generalFromSymmetric(amat)) + } + for i := 0; i < n; i++ { + for j := i; j < n; j++ { + s.mat.Data[i*s.mat.Stride+j] = f * amat.Data[i*amat.Stride+j] + } + } + return + } + for i := 0; i < n; i++ { + for j := i; j < n; j++ { + s.mat.Data[i*s.mat.Stride+j] = f * a.At(i, j) + } + } +} + +// SubsetSym extracts a subset of the rows and columns of the matrix a and stores +// the result in-place into the receiver. The resulting matrix size is +// len(set)×len(set). Specifically, at the conclusion of SubsetSym, +// s.At(i, j) equals a.At(set[i], set[j]). Note that the supplied set does not +// have to be a strict subset, dimension repeats are allowed. +func (s *SymDense) SubsetSym(a Symmetric, set []int) { + n := len(set) + na := a.SymmetricDim() + s.reuseAsNonZeroed(n) + var restore func() + if a == s { + s, restore = s.isolatedWorkspace(a) + defer restore() + } + + if a, ok := a.(RawSymmetricer); ok { + raw := a.RawSymmetric() + if s != a { + s.checkOverlap(generalFromSymmetric(raw)) + } + for i := 0; i < n; i++ { + ssub := s.mat.Data[i*s.mat.Stride : i*s.mat.Stride+n] + r := set[i] + rsub := raw.Data[r*raw.Stride : r*raw.Stride+na] + for j := i; j < n; j++ { + c := set[j] + if r <= c { + ssub[j] = rsub[c] + } else { + ssub[j] = raw.Data[c*raw.Stride+r] + } + } + } + return + } + for i := 0; i < n; i++ { + for j := i; j < n; j++ { + s.mat.Data[i*s.mat.Stride+j] = a.At(set[i], set[j]) + } + } +} + +// SliceSym returns a new Matrix that shares backing data with the receiver. +// The returned matrix starts at {i,i} of the receiver and extends k-i rows +// and columns. The final row and column in the resulting matrix is k-1. +// SliceSym panics with ErrIndexOutOfRange if the slice is outside the +// capacity of the receiver. +func (s *SymDense) SliceSym(i, k int) Symmetric { + return s.sliceSym(i, k) +} + +func (s *SymDense) sliceSym(i, k int) *SymDense { + sz := s.cap + if i < 0 || sz < i || k < i || sz < k { + panic(ErrIndexOutOfRange) + } + v := *s + v.mat.Data = s.mat.Data[i*s.mat.Stride+i : (k-1)*s.mat.Stride+k] + v.mat.N = k - i + v.cap = s.cap - i + return &v +} + +// Norm returns the specified norm of the receiver. Valid norms are: +// +// 1 - The maximum absolute column sum +// 2 - The Frobenius norm, the square root of the sum of the squares of the elements +// Inf - The maximum absolute row sum +// +// Norm will panic with ErrNormOrder if an illegal norm is specified and with +// ErrZeroLength if the matrix has zero size. +func (s *SymDense) Norm(norm float64) float64 { + if s.IsEmpty() { + panic(ErrZeroLength) + } + lnorm := normLapack(norm, false) + if lnorm == lapack.MaxColumnSum || lnorm == lapack.MaxRowSum { + work := getFloat64s(s.mat.N, false) + defer putFloat64s(work) + return lapack64.Lansy(lnorm, s.mat, work) + } + return lapack64.Lansy(lnorm, s.mat, nil) +} + +// Trace returns the trace of the matrix. +// +// Trace will panic with ErrZeroLength if the matrix has zero size. +func (s *SymDense) Trace() float64 { + if s.IsEmpty() { + panic(ErrZeroLength) + } + // TODO(btracey): could use internal asm sum routine. + var v float64 + for i := 0; i < s.mat.N; i++ { + v += s.mat.Data[i*s.mat.Stride+i] + } + return v +} + +// GrowSym returns the receiver expanded by n rows and n columns. If the +// dimensions of the expanded matrix are outside the capacity of the receiver +// a new allocation is made, otherwise not. Note that the receiver itself is +// not modified during the call to GrowSquare. +func (s *SymDense) GrowSym(n int) Symmetric { + if n < 0 { + panic(ErrIndexOutOfRange) + } + if n == 0 { + return s + } + var v SymDense + n += s.mat.N + if s.IsEmpty() || n > s.cap { + v.mat = blas64.Symmetric{ + N: n, + Stride: n, + Uplo: blas.Upper, + Data: make([]float64, n*n), + } + v.cap = n + // Copy elements, including those not currently visible. Use a temporary + // structure to avoid modifying the receiver. + var tmp SymDense + tmp.mat = blas64.Symmetric{ + N: s.cap, + Stride: s.mat.Stride, + Data: s.mat.Data, + Uplo: s.mat.Uplo, + } + tmp.cap = s.cap + v.CopySym(&tmp) + return &v + } + v.mat = blas64.Symmetric{ + N: n, + Stride: s.mat.Stride, + Uplo: blas.Upper, + Data: s.mat.Data[:(n-1)*s.mat.Stride+n], + } + v.cap = s.cap + return &v +} + +// PowPSD computes a^pow where a is a positive symmetric definite matrix. +// +// PowPSD returns an error if the matrix is not positive symmetric definite +// or the Eigen decomposition is not successful. +func (s *SymDense) PowPSD(a Symmetric, pow float64) error { + dim := a.SymmetricDim() + s.reuseAsNonZeroed(dim) + + var eigen EigenSym + ok := eigen.Factorize(a, true) + if !ok { + return ErrFailedEigen + } + values := eigen.Values(nil) + for i, v := range values { + if v <= 0 { + return ErrNotPSD + } + values[i] = math.Pow(v, pow) + } + var u Dense + eigen.VectorsTo(&u) + + s.SymOuterK(values[0], u.ColView(0)) + + var v VecDense + for i := 1; i < dim; i++ { + v.ColViewOf(&u, i) + s.SymRankOne(s, values[i], &v) + } + return nil +} diff --git a/vendor/gonum.org/v1/gonum/mat/triangular.go b/vendor/gonum.org/v1/gonum/mat/triangular.go new file mode 100644 index 0000000000..0e37fb0102 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/mat/triangular.go @@ -0,0 +1,832 @@ +// Copyright ©2015 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package mat + +import ( + "math" + + "gonum.org/v1/gonum/blas" + "gonum.org/v1/gonum/blas/blas64" + "gonum.org/v1/gonum/lapack" + "gonum.org/v1/gonum/lapack/lapack64" +) + +var ( + triDense *TriDense + _ Matrix = triDense + _ allMatrix = triDense + _ denseMatrix = triDense + _ Triangular = triDense + _ RawTriangular = triDense + _ MutableTriangular = triDense + + _ NonZeroDoer = triDense + _ RowNonZeroDoer = triDense + _ ColNonZeroDoer = triDense +) + +// TriDense represents an upper or lower triangular matrix in dense storage +// format. +type TriDense struct { + mat blas64.Triangular + cap int +} + +// Triangular represents a triangular matrix. Triangular matrices are always square. +type Triangular interface { + Matrix + // Triangle returns the number of rows/columns in the matrix and its + // orientation. + Triangle() (n int, kind TriKind) + + // TTri is the equivalent of the T() method in the Matrix interface but + // guarantees the transpose is of triangular type. + TTri() Triangular +} + +// A RawTriangular can return a blas64.Triangular representation of the receiver. +// Changes to the blas64.Triangular.Data slice will be reflected in the original +// matrix, changes to the N, Stride, Uplo and Diag fields will not. +type RawTriangular interface { + RawTriangular() blas64.Triangular +} + +// A MutableTriangular can set elements of a triangular matrix. +type MutableTriangular interface { + Triangular + SetTri(i, j int, v float64) +} + +var ( + _ Matrix = TransposeTri{} + _ Triangular = TransposeTri{} + _ UntransposeTrier = TransposeTri{} +) + +// TransposeTri is a type for performing an implicit transpose of a Triangular +// matrix. It implements the Triangular interface, returning values from the +// transpose of the matrix within. +type TransposeTri struct { + Triangular Triangular +} + +// At returns the value of the element at row i and column j of the transposed +// matrix, that is, row j and column i of the Triangular field. +func (t TransposeTri) At(i, j int) float64 { + return t.Triangular.At(j, i) +} + +// Dims returns the dimensions of the transposed matrix. Triangular matrices are +// square and thus this is the same size as the original Triangular. +func (t TransposeTri) Dims() (r, c int) { + c, r = t.Triangular.Dims() + return r, c +} + +// T performs an implicit transpose by returning the Triangular field. +func (t TransposeTri) T() Matrix { + return t.Triangular +} + +// Triangle returns the number of rows/columns in the matrix and its orientation. +func (t TransposeTri) Triangle() (int, TriKind) { + n, upper := t.Triangular.Triangle() + return n, !upper +} + +// TTri performs an implicit transpose by returning the Triangular field. +func (t TransposeTri) TTri() Triangular { + return t.Triangular +} + +// Untranspose returns the Triangular field. +func (t TransposeTri) Untranspose() Matrix { + return t.Triangular +} + +func (t TransposeTri) UntransposeTri() Triangular { + return t.Triangular +} + +// NewTriDense creates a new Triangular matrix with n rows and columns. If data == nil, +// a new slice is allocated for the backing slice. If len(data) == n*n, data is +// used as the backing slice, and changes to the elements of the returned TriDense +// will be reflected in data. If neither of these is true, NewTriDense will panic. +// NewTriDense will panic if n is zero. +// +// The data must be arranged in row-major order, i.e. the (i*c + j)-th +// element in the data slice is the {i, j}-th element in the matrix. +// Only the values in the triangular portion corresponding to kind are used. +func NewTriDense(n int, kind TriKind, data []float64) *TriDense { + if n <= 0 { + if n == 0 { + panic(ErrZeroLength) + } + panic("mat: negative dimension") + } + if data != nil && len(data) != n*n { + panic(ErrShape) + } + if data == nil { + data = make([]float64, n*n) + } + uplo := blas.Lower + if kind == Upper { + uplo = blas.Upper + } + return &TriDense{ + mat: blas64.Triangular{ + N: n, + Stride: n, + Data: data, + Uplo: uplo, + Diag: blas.NonUnit, + }, + cap: n, + } +} + +func (t *TriDense) Dims() (r, c int) { + return t.mat.N, t.mat.N +} + +// Triangle returns the dimension of t and its orientation. The returned +// orientation is only valid when n is not empty. +func (t *TriDense) Triangle() (n int, kind TriKind) { + return t.mat.N, t.triKind() +} + +func (t *TriDense) isUpper() bool { + return isUpperUplo(t.mat.Uplo) +} + +func (t *TriDense) triKind() TriKind { + return TriKind(isUpperUplo(t.mat.Uplo)) +} + +func isUpperUplo(u blas.Uplo) bool { + switch u { + case blas.Upper: + return true + case blas.Lower: + return false + default: + panic(badTriangle) + } +} + +// asSymBlas returns the receiver restructured as a blas64.Symmetric with the +// same backing memory. Panics if the receiver is unit. +// This returns a blas64.Symmetric and not a *SymDense because SymDense can only +// be upper triangular. +func (t *TriDense) asSymBlas() blas64.Symmetric { + if t.mat.Diag == blas.Unit { + panic("mat: cannot convert unit TriDense into blas64.Symmetric") + } + return blas64.Symmetric{ + N: t.mat.N, + Stride: t.mat.Stride, + Data: t.mat.Data, + Uplo: t.mat.Uplo, + } +} + +// T performs an implicit transpose by returning the receiver inside a Transpose. +func (t *TriDense) T() Matrix { + return Transpose{t} +} + +// TTri performs an implicit transpose by returning the receiver inside a TransposeTri. +func (t *TriDense) TTri() Triangular { + return TransposeTri{t} +} + +func (t *TriDense) RawTriangular() blas64.Triangular { + return t.mat +} + +// SetRawTriangular sets the underlying blas64.Triangular used by the receiver. +// Changes to elements in the receiver following the call will be reflected +// in the input. +// +// The supplied Triangular must not use blas.Unit storage format. +func (t *TriDense) SetRawTriangular(mat blas64.Triangular) { + if mat.Diag == blas.Unit { + panic("mat: cannot set TriDense with Unit storage format") + } + t.cap = mat.N + t.mat = mat +} + +// Reset empties the matrix so that it can be reused as the +// receiver of a dimensionally restricted operation. +// +// Reset should not be used when the matrix shares backing data. +// See the Reseter interface for more information. +func (t *TriDense) Reset() { + // N and Stride must be zeroed in unison. + t.mat.N, t.mat.Stride = 0, 0 + // Defensively zero Uplo to ensure + // it is set correctly later. + t.mat.Uplo = 0 + t.mat.Data = t.mat.Data[:0] +} + +// Zero sets all of the matrix elements to zero. +func (t *TriDense) Zero() { + if t.isUpper() { + for i := 0; i < t.mat.N; i++ { + zero(t.mat.Data[i*t.mat.Stride+i : i*t.mat.Stride+t.mat.N]) + } + return + } + for i := 0; i < t.mat.N; i++ { + zero(t.mat.Data[i*t.mat.Stride : i*t.mat.Stride+i+1]) + } +} + +// IsEmpty returns whether the receiver is empty. Empty matrices can be the +// receiver for size-restricted operations. The receiver can be emptied using +// Reset. +func (t *TriDense) IsEmpty() bool { + // It must be the case that t.Dims() returns + // zeros in this case. See comment in Reset(). + return t.mat.Stride == 0 +} + +// untransposeTri untransposes a matrix if applicable. If a is an UntransposeTrier, then +// untransposeTri returns the underlying matrix and true. If it is not, then it returns +// the input matrix and false. +func untransposeTri(a Triangular) (Triangular, bool) { + if ut, ok := a.(UntransposeTrier); ok { + return ut.UntransposeTri(), true + } + return a, false +} + +// ReuseAsTri changes the receiver if it IsEmpty() to be of size n×n. +// +// ReuseAsTri re-uses the backing data slice if it has sufficient capacity, +// otherwise a new slice is allocated. The backing data is zero on return. +// +// ReuseAsTri panics if the receiver is not empty, and panics if +// the input size is less than one. To empty the receiver for re-use, +// Reset should be used. +func (t *TriDense) ReuseAsTri(n int, kind TriKind) { + if n <= 0 { + if n == 0 { + panic(ErrZeroLength) + } + panic(ErrNegativeDimension) + } + if !t.IsEmpty() { + panic(ErrReuseNonEmpty) + } + t.reuseAsZeroed(n, kind) +} + +// reuseAsNonZeroed resizes an empty receiver to an n×n triangular matrix with the given +// orientation. If the receiver is not empty, reuseAsNonZeroed checks that the receiver +// is the correct size and orientation. +func (t *TriDense) reuseAsNonZeroed(n int, kind TriKind) { + // reuseAsNonZeroed must be kept in sync with reuseAsZeroed. + if n == 0 { + panic(ErrZeroLength) + } + ul := blas.Lower + if kind == Upper { + ul = blas.Upper + } + if t.mat.N > t.cap { + // Panic as a string, not a mat.Error. + panic(badCap) + } + if t.IsEmpty() { + t.mat = blas64.Triangular{ + N: n, + Stride: n, + Diag: blas.NonUnit, + Data: use(t.mat.Data, n*n), + Uplo: ul, + } + t.cap = n + return + } + if t.mat.N != n { + panic(ErrShape) + } + if t.mat.Uplo != ul { + panic(ErrTriangle) + } +} + +// reuseAsZeroed resizes an empty receiver to an n×n triangular matrix with the given +// orientation. If the receiver is not empty, reuseAsZeroed checks that the receiver +// is the correct size and orientation. It then zeros out the matrix data. +func (t *TriDense) reuseAsZeroed(n int, kind TriKind) { + // reuseAsZeroed must be kept in sync with reuseAsNonZeroed. + if n == 0 { + panic(ErrZeroLength) + } + ul := blas.Lower + if kind == Upper { + ul = blas.Upper + } + if t.mat.N > t.cap { + // Panic as a string, not a mat.Error. + panic(badCap) + } + if t.IsEmpty() { + t.mat = blas64.Triangular{ + N: n, + Stride: n, + Diag: blas.NonUnit, + Data: useZeroed(t.mat.Data, n*n), + Uplo: ul, + } + t.cap = n + return + } + if t.mat.N != n { + panic(ErrShape) + } + if t.mat.Uplo != ul { + panic(ErrTriangle) + } + t.Zero() +} + +// isolatedWorkspace returns a new TriDense matrix w with the size of a and +// returns a callback to defer which performs cleanup at the return of the call. +// This should be used when a method receiver is the same pointer as an input argument. +func (t *TriDense) isolatedWorkspace(a Triangular) (w *TriDense, restore func()) { + n, kind := a.Triangle() + if n == 0 { + panic(ErrZeroLength) + } + w = getTriDenseWorkspace(n, kind, false) + return w, func() { + t.Copy(w) + putTriWorkspace(w) + } +} + +// DiagView returns the diagonal as a matrix backed by the original data. +func (t *TriDense) DiagView() Diagonal { + if t.mat.Diag == blas.Unit { + panic("mat: cannot take view of Unit diagonal") + } + n := t.mat.N + return &DiagDense{ + mat: blas64.Vector{ + N: n, + Inc: t.mat.Stride + 1, + Data: t.mat.Data[:(n-1)*t.mat.Stride+n], + }, + } +} + +// Copy makes a copy of elements of a into the receiver. It is similar to the +// built-in copy; it copies as much as the overlap between the two matrices and +// returns the number of rows and columns it copied. Only elements within the +// receiver's non-zero triangle are set. +// +// See the Copier interface for more information. +func (t *TriDense) Copy(a Matrix) (r, c int) { + r, c = a.Dims() + r = min(r, t.mat.N) + c = min(c, t.mat.N) + if r == 0 || c == 0 { + return 0, 0 + } + + switch a := a.(type) { + case RawMatrixer: + amat := a.RawMatrix() + if t.isUpper() { + for i := 0; i < r; i++ { + copy(t.mat.Data[i*t.mat.Stride+i:i*t.mat.Stride+c], amat.Data[i*amat.Stride+i:i*amat.Stride+c]) + } + } else { + for i := 0; i < r; i++ { + copy(t.mat.Data[i*t.mat.Stride:i*t.mat.Stride+i+1], amat.Data[i*amat.Stride:i*amat.Stride+i+1]) + } + } + case RawTriangular: + amat := a.RawTriangular() + aIsUpper := isUpperUplo(amat.Uplo) + tIsUpper := t.isUpper() + switch { + case tIsUpper && aIsUpper: + for i := 0; i < r; i++ { + copy(t.mat.Data[i*t.mat.Stride+i:i*t.mat.Stride+c], amat.Data[i*amat.Stride+i:i*amat.Stride+c]) + } + case !tIsUpper && !aIsUpper: + for i := 0; i < r; i++ { + copy(t.mat.Data[i*t.mat.Stride:i*t.mat.Stride+i+1], amat.Data[i*amat.Stride:i*amat.Stride+i+1]) + } + default: + for i := 0; i < r; i++ { + t.set(i, i, amat.Data[i*amat.Stride+i]) + } + } + default: + isUpper := t.isUpper() + for i := 0; i < r; i++ { + if isUpper { + for j := i; j < c; j++ { + t.set(i, j, a.At(i, j)) + } + } else { + for j := 0; j <= i; j++ { + t.set(i, j, a.At(i, j)) + } + } + } + } + + return r, c +} + +// InverseTri computes the inverse of the triangular matrix a, storing the result +// into the receiver. If a is ill-conditioned, a Condition error will be returned. +// Note that matrix inversion is numerically unstable, and should generally be +// avoided where possible, for example by using the Solve routines. +func (t *TriDense) InverseTri(a Triangular) error { + t.checkOverlapMatrix(a) + n, _ := a.Triangle() + t.reuseAsNonZeroed(a.Triangle()) + t.Copy(a) + work := getFloat64s(3*n, false) + iwork := getInts(n, false) + cond := lapack64.Trcon(CondNorm, t.mat, work, iwork) + putFloat64s(work) + putInts(iwork) + if math.IsInf(cond, 1) { + return Condition(cond) + } + ok := lapack64.Trtri(t.mat) + if !ok { + return Condition(math.Inf(1)) + } + if cond > ConditionTolerance { + return Condition(cond) + } + return nil +} + +// MulTri takes the product of triangular matrices a and b and places the result +// in the receiver. The size of a and b must match, and they both must have the +// same TriKind, or Mul will panic. +func (t *TriDense) MulTri(a, b Triangular) { + n, kind := a.Triangle() + nb, kindb := b.Triangle() + if n != nb { + panic(ErrShape) + } + if kind != kindb { + panic(ErrTriangle) + } + + aU, _ := untransposeTri(a) + bU, _ := untransposeTri(b) + t.checkOverlapMatrix(bU) + t.checkOverlapMatrix(aU) + t.reuseAsNonZeroed(n, kind) + var restore func() + if t == aU { + t, restore = t.isolatedWorkspace(aU) + defer restore() + } else if t == bU { + t, restore = t.isolatedWorkspace(bU) + defer restore() + } + + // Inspect types here, helps keep the loops later clean(er). + _, aDiag := aU.(Diagonal) + _, bDiag := bU.(Diagonal) + // If they are both diagonal only need 1 loop. + // All diagonal matrices are Upper. + // TODO: Add fast paths for DiagDense. + if aDiag && bDiag { + t.Zero() + for i := 0; i < n; i++ { + t.SetTri(i, i, a.At(i, i)*b.At(i, i)) + } + return + } + + // Now we know at least one matrix is non-diagonal. + // And all diagonal matrices are all Upper. + // The both-diagonal case is handled above. + // TODO: Add fast paths for Dense variants. + if kind == Upper { + for i := 0; i < n; i++ { + for j := i; j < n; j++ { + switch { + case aDiag: + t.SetTri(i, j, a.At(i, i)*b.At(i, j)) + case bDiag: + t.SetTri(i, j, a.At(i, j)*b.At(j, j)) + default: + var v float64 + for k := i; k <= j; k++ { + v += a.At(i, k) * b.At(k, j) + } + t.SetTri(i, j, v) + } + } + } + return + } + for i := 0; i < n; i++ { + for j := 0; j <= i; j++ { + var v float64 + for k := j; k <= i; k++ { + v += a.At(i, k) * b.At(k, j) + } + t.SetTri(i, j, v) + } + } +} + +// ScaleTri multiplies the elements of a by f, placing the result in the receiver. +// If the receiver is non-zero, the size and kind of the receiver must match +// the input, or ScaleTri will panic. +func (t *TriDense) ScaleTri(f float64, a Triangular) { + n, kind := a.Triangle() + t.reuseAsNonZeroed(n, kind) + + // TODO(btracey): Improve the set of fast-paths. + switch a := a.(type) { + case RawTriangular: + amat := a.RawTriangular() + if t != a { + t.checkOverlap(generalFromTriangular(amat)) + } + if kind == Upper { + for i := 0; i < n; i++ { + ts := t.mat.Data[i*t.mat.Stride+i : i*t.mat.Stride+n] + as := amat.Data[i*amat.Stride+i : i*amat.Stride+n] + for i, v := range as { + ts[i] = v * f + } + } + return + } + for i := 0; i < n; i++ { + ts := t.mat.Data[i*t.mat.Stride : i*t.mat.Stride+i+1] + as := amat.Data[i*amat.Stride : i*amat.Stride+i+1] + for i, v := range as { + ts[i] = v * f + } + } + return + default: + t.checkOverlapMatrix(a) + isUpper := kind == Upper + for i := 0; i < n; i++ { + if isUpper { + for j := i; j < n; j++ { + t.set(i, j, f*a.At(i, j)) + } + } else { + for j := 0; j <= i; j++ { + t.set(i, j, f*a.At(i, j)) + } + } + } + } +} + +// SliceTri returns a new Triangular that shares backing data with the receiver. +// The returned matrix starts at {i,i} of the receiver and extends k-i rows and +// columns. The final row and column in the resulting matrix is k-1. +// SliceTri panics with ErrIndexOutOfRange if the slice is outside the capacity +// of the receiver. +func (t *TriDense) SliceTri(i, k int) Triangular { + return t.sliceTri(i, k) +} + +func (t *TriDense) sliceTri(i, k int) *TriDense { + if i < 0 || t.cap < i || k < i || t.cap < k { + panic(ErrIndexOutOfRange) + } + v := *t + v.mat.Data = t.mat.Data[i*t.mat.Stride+i : (k-1)*t.mat.Stride+k] + v.mat.N = k - i + v.cap = t.cap - i + return &v +} + +// Norm returns the specified norm of the receiver. Valid norms are: +// +// 1 - The maximum absolute column sum +// 2 - The Frobenius norm, the square root of the sum of the squares of the elements +// Inf - The maximum absolute row sum +// +// Norm will panic with ErrNormOrder if an illegal norm is specified and with +// ErrZeroLength if the matrix has zero size. +func (t *TriDense) Norm(norm float64) float64 { + if t.IsEmpty() { + panic(ErrZeroLength) + } + lnorm := normLapack(norm, false) + if lnorm == lapack.MaxColumnSum { + work := getFloat64s(t.mat.N, false) + defer putFloat64s(work) + return lapack64.Lantr(lnorm, t.mat, work) + } + return lapack64.Lantr(lnorm, t.mat, nil) +} + +// Trace returns the trace of the matrix. +// +// Trace will panic with ErrZeroLength if the matrix has zero size. +func (t *TriDense) Trace() float64 { + if t.IsEmpty() { + panic(ErrZeroLength) + } + // TODO(btracey): could use internal asm sum routine. + var v float64 + for i := 0; i < t.mat.N; i++ { + v += t.mat.Data[i*t.mat.Stride+i] + } + return v +} + +// copySymIntoTriangle copies a symmetric matrix into a TriDense +func copySymIntoTriangle(t *TriDense, s Symmetric) { + n, upper := t.Triangle() + ns := s.SymmetricDim() + if n != ns { + panic("mat: triangle size mismatch") + } + ts := t.mat.Stride + if rs, ok := s.(RawSymmetricer); ok { + sd := rs.RawSymmetric() + ss := sd.Stride + if upper { + if sd.Uplo == blas.Upper { + for i := 0; i < n; i++ { + copy(t.mat.Data[i*ts+i:i*ts+n], sd.Data[i*ss+i:i*ss+n]) + } + return + } + for i := 0; i < n; i++ { + for j := i; j < n; j++ { + t.mat.Data[i*ts+j] = sd.Data[j*ss+i] + } + } + return + } + if sd.Uplo == blas.Upper { + for i := 0; i < n; i++ { + for j := 0; j <= i; j++ { + t.mat.Data[i*ts+j] = sd.Data[j*ss+i] + } + } + return + } + for i := 0; i < n; i++ { + copy(t.mat.Data[i*ts:i*ts+i+1], sd.Data[i*ss:i*ss+i+1]) + } + return + } + if upper { + for i := 0; i < n; i++ { + for j := i; j < n; j++ { + t.mat.Data[i*ts+j] = s.At(i, j) + } + } + return + } + for i := 0; i < n; i++ { + for j := 0; j <= i; j++ { + t.mat.Data[i*ts+j] = s.At(i, j) + } + } +} + +// DoNonZero calls the function fn for each of the non-zero elements of t. The function fn +// takes a row/column index and the element value of t at (i, j). +func (t *TriDense) DoNonZero(fn func(i, j int, v float64)) { + if t.isUpper() { + for i := 0; i < t.mat.N; i++ { + for j := i; j < t.mat.N; j++ { + v := t.at(i, j) + if v != 0 { + fn(i, j, v) + } + } + } + return + } + for i := 0; i < t.mat.N; i++ { + for j := 0; j <= i; j++ { + v := t.at(i, j) + if v != 0 { + fn(i, j, v) + } + } + } +} + +// DoRowNonZero calls the function fn for each of the non-zero elements of row i of t. The function fn +// takes a row/column index and the element value of t at (i, j). +func (t *TriDense) DoRowNonZero(i int, fn func(i, j int, v float64)) { + if i < 0 || t.mat.N <= i { + panic(ErrRowAccess) + } + if t.isUpper() { + for j := i; j < t.mat.N; j++ { + v := t.at(i, j) + if v != 0 { + fn(i, j, v) + } + } + return + } + for j := 0; j <= i; j++ { + v := t.at(i, j) + if v != 0 { + fn(i, j, v) + } + } +} + +// DoColNonZero calls the function fn for each of the non-zero elements of column j of t. The function fn +// takes a row/column index and the element value of t at (i, j). +func (t *TriDense) DoColNonZero(j int, fn func(i, j int, v float64)) { + if j < 0 || t.mat.N <= j { + panic(ErrColAccess) + } + if t.isUpper() { + for i := 0; i <= j; i++ { + v := t.at(i, j) + if v != 0 { + fn(i, j, v) + } + } + return + } + for i := j; i < t.mat.N; i++ { + v := t.at(i, j) + if v != 0 { + fn(i, j, v) + } + } +} + +// SolveTo solves a triangular system T * X = B or Tᵀ * X = B where T is an n×n +// triangular matrix represented by the receiver and B is a given n×nrhs matrix. +// If T is non-singular, the result will be stored into dst and nil will be +// returned. If T is singular, the contents of dst will be undefined and a +// Condition error will be returned. +// +// If dst is empty, SolveTo will resize it to n×nrhs. If dst is not empty, +// SolveTo will panic if dst is not n×nrhs. +func (t *TriDense) SolveTo(dst *Dense, trans bool, b Matrix) error { + n, nrhs := b.Dims() + if n != t.mat.N { + panic(ErrShape) + } + + dst.reuseAsNonZeroed(n, nrhs) + bU, bTrans := untranspose(b) + if dst == bU { + if bTrans { + work := getDenseWorkspace(n, nrhs, false) + defer putDenseWorkspace(work) + work.Copy(b) + dst.Copy(work) + } + } else { + if rm, ok := bU.(RawMatrixer); ok { + dst.checkOverlap(rm.RawMatrix()) + } + dst.Copy(b) + } + + transT := blas.NoTrans + if trans { + transT = blas.Trans + } + ok := lapack64.Trtrs(transT, t.mat, dst.mat) + if !ok { + return Condition(math.Inf(1)) + } + + work := getFloat64s(3*n, false) + iwork := getInts(n, false) + cond := lapack64.Trcon(CondNorm, t.mat, work, iwork) + putFloat64s(work) + putInts(iwork) + if cond > ConditionTolerance { + return Condition(cond) + } + + return nil +} diff --git a/vendor/gonum.org/v1/gonum/mat/triband.go b/vendor/gonum.org/v1/gonum/mat/triband.go new file mode 100644 index 0000000000..aa0b51d6f7 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/mat/triband.go @@ -0,0 +1,694 @@ +// Copyright ©2018 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package mat + +import ( + "math" + + "gonum.org/v1/gonum/blas" + "gonum.org/v1/gonum/blas/blas64" + "gonum.org/v1/gonum/lapack" + "gonum.org/v1/gonum/lapack/lapack64" +) + +var ( + triBand TriBanded + _ Banded = triBand + _ Triangular = triBand + + triBandDense *TriBandDense + _ Matrix = triBandDense + _ allMatrix = triBandDense + _ denseMatrix = triBandDense + _ Triangular = triBandDense + _ Banded = triBandDense + _ TriBanded = triBandDense + _ RawTriBander = triBandDense + _ MutableTriBanded = triBandDense +) + +// TriBanded is a triangular band matrix interface type. +type TriBanded interface { + Banded + + // Triangle returns the number of rows/columns in the matrix and its + // orientation. + Triangle() (n int, kind TriKind) + + // TTri is the equivalent of the T() method in the Matrix interface but + // guarantees the transpose is of triangular type. + TTri() Triangular + + // TriBand returns the number of rows/columns in the matrix, the + // size of the bandwidth, and the orientation. + TriBand() (n, k int, kind TriKind) + + // TTriBand is the equivalent of the T() method in the Matrix interface but + // guarantees the transpose is of banded triangular type. + TTriBand() TriBanded +} + +// A RawTriBander can return a blas64.TriangularBand representation of the receiver. +// Changes to the blas64.TriangularBand.Data slice will be reflected in the original +// matrix, changes to the N, K, Stride, Uplo and Diag fields will not. +type RawTriBander interface { + RawTriBand() blas64.TriangularBand +} + +// MutableTriBanded is a triangular band matrix interface type that allows +// elements to be altered. +type MutableTriBanded interface { + TriBanded + SetTriBand(i, j int, v float64) +} + +var ( + tTriBand TransposeTriBand + _ Matrix = tTriBand + _ TriBanded = tTriBand + _ Untransposer = tTriBand + _ UntransposeTrier = tTriBand + _ UntransposeBander = tTriBand + _ UntransposeTriBander = tTriBand +) + +// TransposeTriBand is a type for performing an implicit transpose of a TriBanded +// matrix. It implements the TriBanded interface, returning values from the +// transpose of the matrix within. +type TransposeTriBand struct { + TriBanded TriBanded +} + +// At returns the value of the element at row i and column j of the transposed +// matrix, that is, row j and column i of the TriBanded field. +func (t TransposeTriBand) At(i, j int) float64 { + return t.TriBanded.At(j, i) +} + +// Dims returns the dimensions of the transposed matrix. TriBanded matrices are +// square and thus this is the same size as the original TriBanded. +func (t TransposeTriBand) Dims() (r, c int) { + c, r = t.TriBanded.Dims() + return r, c +} + +// T performs an implicit transpose by returning the TriBand field. +func (t TransposeTriBand) T() Matrix { + return t.TriBanded +} + +// Triangle returns the number of rows/columns in the matrix and its orientation. +func (t TransposeTriBand) Triangle() (int, TriKind) { + n, upper := t.TriBanded.Triangle() + return n, !upper +} + +// TTri performs an implicit transpose by returning the TriBand field. +func (t TransposeTriBand) TTri() Triangular { + return t.TriBanded +} + +// Bandwidth returns the upper and lower bandwidths of the matrix. +func (t TransposeTriBand) Bandwidth() (kl, ku int) { + kl, ku = t.TriBanded.Bandwidth() + return ku, kl +} + +// TBand performs an implicit transpose by returning the TriBand field. +func (t TransposeTriBand) TBand() Banded { + return t.TriBanded +} + +// TriBand returns the number of rows/columns in the matrix, the +// size of the bandwidth, and the orientation. +func (t TransposeTriBand) TriBand() (n, k int, kind TriKind) { + n, k, kind = t.TriBanded.TriBand() + return n, k, !kind +} + +// TTriBand performs an implicit transpose by returning the TriBand field. +func (t TransposeTriBand) TTriBand() TriBanded { + return t.TriBanded +} + +// Untranspose returns the Triangular field. +func (t TransposeTriBand) Untranspose() Matrix { + return t.TriBanded +} + +// UntransposeTri returns the underlying Triangular matrix. +func (t TransposeTriBand) UntransposeTri() Triangular { + return t.TriBanded +} + +// UntransposeBand returns the underlying Banded matrix. +func (t TransposeTriBand) UntransposeBand() Banded { + return t.TriBanded +} + +// UntransposeTriBand returns the underlying TriBanded matrix. +func (t TransposeTriBand) UntransposeTriBand() TriBanded { + return t.TriBanded +} + +// TriBandDense represents a triangular band matrix in dense storage format. +type TriBandDense struct { + mat blas64.TriangularBand +} + +// NewTriBandDense creates a new triangular banded matrix with n rows and columns, +// k bands in the direction of the specified kind. If data == nil, +// a new slice is allocated for the backing slice. If len(data) == n*(k+1), +// data is used as the backing slice, and changes to the elements of the returned +// TriBandDense will be reflected in data. If neither of these is true, NewTriBandDense +// will panic. k must be at least zero and less than n, otherwise NewTriBandDense will panic. +// +// The data must be arranged in row-major order constructed by removing the zeros +// from the rows outside the band and aligning the diagonals. For example, if +// the upper-triangular banded matrix +// +// 1 2 3 0 0 0 +// 0 4 5 6 0 0 +// 0 0 7 8 9 0 +// 0 0 0 10 11 12 +// 0 0 0 0 13 14 +// 0 0 0 0 0 15 +// +// becomes (* entries are never accessed) +// +// 1 2 3 +// 4 5 6 +// 7 8 9 +// 10 11 12 +// 13 14 * +// 15 * * +// +// which is passed to NewTriBandDense as []float64{1, 2, ..., 15, *, *, *} +// with k=2 and kind = mat.Upper. +// The lower triangular banded matrix +// +// 1 0 0 0 0 0 +// 2 3 0 0 0 0 +// 4 5 6 0 0 0 +// 0 7 8 9 0 0 +// 0 0 10 11 12 0 +// 0 0 0 13 14 15 +// +// becomes (* entries are never accessed) +// - * 1 +// - 2 3 +// 4 5 6 +// 7 8 9 +// 10 11 12 +// 13 14 15 +// +// which is passed to NewTriBandDense as []float64{*, *, *, 1, 2, ..., 15} +// with k=2 and kind = mat.Lower. +// Only the values in the band portion of the matrix are used. +func NewTriBandDense(n, k int, kind TriKind, data []float64) *TriBandDense { + if n <= 0 || k < 0 { + if n == 0 { + panic(ErrZeroLength) + } + panic(ErrNegativeDimension) + } + if k+1 > n { + panic(ErrBandwidth) + } + bc := k + 1 + if data != nil && len(data) != n*bc { + panic(ErrShape) + } + if data == nil { + data = make([]float64, n*bc) + } + uplo := blas.Lower + if kind { + uplo = blas.Upper + } + return &TriBandDense{ + mat: blas64.TriangularBand{ + Uplo: uplo, + Diag: blas.NonUnit, + N: n, + K: k, + Data: data, + Stride: bc, + }, + } +} + +// Dims returns the number of rows and columns in the matrix. +func (t *TriBandDense) Dims() (r, c int) { + return t.mat.N, t.mat.N +} + +// T performs an implicit transpose by returning the receiver inside a Transpose. +func (t *TriBandDense) T() Matrix { + return Transpose{t} +} + +// IsEmpty returns whether the receiver is empty. Empty matrices can be the +// receiver for size-restricted operations. The receiver can be emptied using +// Reset. +func (t *TriBandDense) IsEmpty() bool { + // It must be the case that t.Dims() returns + // zeros in this case. See comment in Reset(). + return t.mat.Stride == 0 +} + +// Reset empties the matrix so that it can be reused as the +// receiver of a dimensionally restricted operation. +// +// Reset should not be used when the matrix shares backing data. +// See the Reseter interface for more information. +func (t *TriBandDense) Reset() { + t.mat.N = 0 + t.mat.Stride = 0 + t.mat.K = 0 + t.mat.Data = t.mat.Data[:0] +} + +// ReuseAsTriBand changes the receiver to be of size n×n, bandwidth k+1 and of +// the given kind, re-using the backing data slice if it has sufficient capacity +// and allocating a new slice otherwise. The backing data is zero on return. +// +// The receiver must be empty, n must be positive and k must be non-negative and +// less than n, otherwise ReuseAsTriBand will panic. To empty the receiver for +// re-use, Reset should be used. +func (t *TriBandDense) ReuseAsTriBand(n, k int, kind TriKind) { + if n <= 0 || k < 0 { + if n == 0 { + panic(ErrZeroLength) + } + panic(ErrNegativeDimension) + } + if k+1 > n { + panic(ErrBandwidth) + } + if !t.IsEmpty() { + panic(ErrReuseNonEmpty) + } + t.reuseAsZeroed(n, k, kind) +} + +// reuseAsZeroed resizes an empty receiver to an n×n triangular band matrix with +// the given bandwidth and orientation. If the receiver is not empty, +// reuseAsZeroed checks that the receiver has the correct size, bandwidth and +// orientation. It then zeros out the matrix data. +func (t *TriBandDense) reuseAsZeroed(n, k int, kind TriKind) { + // reuseAsZeroed must be kept in sync with reuseAsNonZeroed. + if n == 0 { + panic(ErrZeroLength) + } + ul := blas.Lower + if kind == Upper { + ul = blas.Upper + } + if t.IsEmpty() { + t.mat = blas64.TriangularBand{ + Uplo: ul, + Diag: blas.NonUnit, + N: n, + K: k, + Data: useZeroed(t.mat.Data, n*(k+1)), + Stride: k + 1, + } + return + } + if t.mat.N != n || t.mat.K != k { + panic(ErrShape) + } + if t.mat.Uplo != ul { + panic(ErrTriangle) + } + t.Zero() +} + +// reuseAsNonZeroed resizes an empty receiver to an n×n triangular band matrix +// with the given bandwidth and orientation. If the receiver is not empty, +// reuseAsZeroed checks that the receiver has the correct size, bandwidth and +// orientation. +// +//lint:ignore U1000 This will be used later. +func (t *TriBandDense) reuseAsNonZeroed(n, k int, kind TriKind) { + // reuseAsNonZeroed must be kept in sync with reuseAsZeroed. + if n == 0 { + panic(ErrZeroLength) + } + ul := blas.Lower + if kind == Upper { + ul = blas.Upper + } + if t.IsEmpty() { + t.mat = blas64.TriangularBand{ + Uplo: ul, + Diag: blas.NonUnit, + N: n, + K: k, + Data: use(t.mat.Data, n*(k+1)), + Stride: k + 1, + } + return + } + if t.mat.N != n || t.mat.K != k { + panic(ErrShape) + } + if t.mat.Uplo != ul { + panic(ErrTriangle) + } +} + +// DoNonZero calls the function fn for each of the non-zero elements of t. The function fn +// takes a row/column index and the element value of t at (i, j). +func (t *TriBandDense) DoNonZero(fn func(i, j int, v float64)) { + if t.isUpper() { + for i := 0; i < t.mat.N; i++ { + for j := i; j < min(i+t.mat.K+1, t.mat.N); j++ { + v := t.at(i, j) + if v != 0 { + fn(i, j, v) + } + } + } + } else { + for i := 0; i < t.mat.N; i++ { + for j := max(0, i-t.mat.K); j <= i; j++ { + v := t.at(i, j) + if v != 0 { + fn(i, j, v) + } + } + } + } +} + +// DoRowNonZero calls the function fn for each of the non-zero elements of row i of t. The function fn +// takes a row/column index and the element value of t at (i, j). +func (t *TriBandDense) DoRowNonZero(i int, fn func(i, j int, v float64)) { + if i < 0 || t.mat.N <= i { + panic(ErrRowAccess) + } + if t.isUpper() { + for j := i; j < min(i+t.mat.K+1, t.mat.N); j++ { + v := t.at(i, j) + if v != 0 { + fn(i, j, v) + } + } + } else { + for j := max(0, i-t.mat.K); j <= i; j++ { + v := t.at(i, j) + if v != 0 { + fn(i, j, v) + } + } + } +} + +// DoColNonZero calls the function fn for each of the non-zero elements of column j of t. The function fn +// takes a row/column index and the element value of t at (i, j). +func (t *TriBandDense) DoColNonZero(j int, fn func(i, j int, v float64)) { + if j < 0 || t.mat.N <= j { + panic(ErrColAccess) + } + if t.isUpper() { + for i := 0; i < t.mat.N; i++ { + v := t.at(i, j) + if v != 0 { + fn(i, j, v) + } + } + } else { + for i := 0; i < t.mat.N; i++ { + v := t.at(i, j) + if v != 0 { + fn(i, j, v) + } + } + } +} + +// Zero sets all of the matrix elements to zero. +func (t *TriBandDense) Zero() { + if t.isUpper() { + for i := 0; i < t.mat.N; i++ { + u := min(1+t.mat.K, t.mat.N-i) + zero(t.mat.Data[i*t.mat.Stride : i*t.mat.Stride+u]) + } + return + } + for i := 0; i < t.mat.N; i++ { + l := max(0, t.mat.K-i) + zero(t.mat.Data[i*t.mat.Stride+l : i*t.mat.Stride+t.mat.K+1]) + } +} + +func (t *TriBandDense) isUpper() bool { + return isUpperUplo(t.mat.Uplo) +} + +func (t *TriBandDense) triKind() TriKind { + return TriKind(isUpperUplo(t.mat.Uplo)) +} + +// Triangle returns the dimension of t and its orientation. The returned +// orientation is only valid when n is not zero. +func (t *TriBandDense) Triangle() (n int, kind TriKind) { + return t.mat.N, t.triKind() +} + +// TTri performs an implicit transpose by returning the receiver inside a TransposeTri. +func (t *TriBandDense) TTri() Triangular { + return TransposeTri{t} +} + +// Bandwidth returns the upper and lower bandwidths of the matrix. +func (t *TriBandDense) Bandwidth() (kl, ku int) { + if t.isUpper() { + return 0, t.mat.K + } + return t.mat.K, 0 +} + +// TBand performs an implicit transpose by returning the receiver inside a TransposeBand. +func (t *TriBandDense) TBand() Banded { + return TransposeBand{t} +} + +// TriBand returns the number of rows/columns in the matrix, the +// size of the bandwidth, and the orientation. +func (t *TriBandDense) TriBand() (n, k int, kind TriKind) { + return t.mat.N, t.mat.K, TriKind(!t.IsEmpty()) && t.triKind() +} + +// TTriBand performs an implicit transpose by returning the receiver inside a TransposeTriBand. +func (t *TriBandDense) TTriBand() TriBanded { + return TransposeTriBand{t} +} + +// RawTriBand returns the underlying blas64.TriangularBand used by the receiver. +// Changes to the blas64.TriangularBand.Data slice will be reflected in the original +// matrix, changes to the N, K, Stride, Uplo and Diag fields will not. +func (t *TriBandDense) RawTriBand() blas64.TriangularBand { + return t.mat +} + +// SetRawTriBand sets the underlying blas64.TriangularBand used by the receiver. +// Changes to elements in the receiver following the call will be reflected +// in the input. +// +// The supplied TriangularBand must not use blas.Unit storage format. +func (t *TriBandDense) SetRawTriBand(mat blas64.TriangularBand) { + if mat.Diag == blas.Unit { + panic("mat: cannot set TriBand with Unit storage") + } + t.mat = mat +} + +// DiagView returns the diagonal as a matrix backed by the original data. +func (t *TriBandDense) DiagView() Diagonal { + if t.mat.Diag == blas.Unit { + panic("mat: cannot take view of Unit diagonal") + } + n := t.mat.N + data := t.mat.Data + if !t.isUpper() { + data = data[t.mat.K:] + } + return &DiagDense{ + mat: blas64.Vector{ + N: n, + Inc: t.mat.Stride, + Data: data[:(n-1)*t.mat.Stride+1], + }, + } +} + +// Norm returns the specified norm of the receiver. Valid norms are: +// +// 1 - The maximum absolute column sum +// 2 - The Frobenius norm, the square root of the sum of the squares of the elements +// Inf - The maximum absolute row sum +// +// Norm will panic with ErrNormOrder if an illegal norm is specified and with +// ErrZeroLength if the matrix has zero size. +func (t *TriBandDense) Norm(norm float64) float64 { + if t.IsEmpty() { + panic(ErrZeroLength) + } + lnorm := normLapack(norm, false) + if lnorm == lapack.MaxColumnSum { + work := getFloat64s(t.mat.N, false) + defer putFloat64s(work) + return lapack64.Lantb(lnorm, t.mat, work) + } + return lapack64.Lantb(lnorm, t.mat, nil) +} + +// Trace returns the trace of the matrix. +// +// Trace will panic with ErrZeroLength if the matrix has zero size. +func (t *TriBandDense) Trace() float64 { + if t.IsEmpty() { + panic(ErrZeroLength) + } + rb := t.RawTriBand() + var tr float64 + var offsetIndex int + if rb.Uplo == blas.Lower { + offsetIndex = rb.K + } + for i := 0; i < rb.N; i++ { + tr += rb.Data[offsetIndex+i*rb.Stride] + } + return tr +} + +// SolveTo solves a triangular system T * X = B or Tᵀ * X = B where T is an +// n×n triangular band matrix represented by the receiver and B is a given +// n×nrhs matrix. If T is non-singular, the result will be stored into dst and +// nil will be returned. If T is singular, the contents of dst will be undefined +// and a Condition error will be returned. +func (t *TriBandDense) SolveTo(dst *Dense, trans bool, b Matrix) error { + n, nrhs := b.Dims() + if n != t.mat.N { + panic(ErrShape) + } + + dst.reuseAsNonZeroed(n, nrhs) + bU, bTrans := untranspose(b) + if dst == bU { + if bTrans { + work := getDenseWorkspace(n, nrhs, false) + defer putDenseWorkspace(work) + work.Copy(b) + dst.Copy(work) + } + } else { + if rm, ok := bU.(RawMatrixer); ok { + dst.checkOverlap(rm.RawMatrix()) + } + dst.Copy(b) + } + + var ok bool + if trans { + ok = lapack64.Tbtrs(blas.Trans, t.mat, dst.mat) + } else { + ok = lapack64.Tbtrs(blas.NoTrans, t.mat, dst.mat) + } + if !ok { + return Condition(math.Inf(1)) + } + return nil +} + +// SolveVecTo solves a triangular system T * x = b or Tᵀ * x = b where T is an +// n×n triangular band matrix represented by the receiver and b is a given +// n-vector. If T is non-singular, the result will be stored into dst and nil +// will be returned. If T is singular, the contents of dst will be undefined and +// a Condition error will be returned. +func (t *TriBandDense) SolveVecTo(dst *VecDense, trans bool, b Vector) error { + n, nrhs := b.Dims() + if n != t.mat.N || nrhs != 1 { + panic(ErrShape) + } + if b, ok := b.(RawVectorer); ok && dst != b { + dst.checkOverlap(b.RawVector()) + } + dst.reuseAsNonZeroed(n) + if dst != b { + dst.CopyVec(b) + } + var ok bool + if trans { + ok = lapack64.Tbtrs(blas.Trans, t.mat, dst.asGeneral()) + } else { + ok = lapack64.Tbtrs(blas.NoTrans, t.mat, dst.asGeneral()) + } + if !ok { + return Condition(math.Inf(1)) + } + return nil +} + +func copySymBandIntoTriBand(dst *TriBandDense, s SymBanded) { + n, k, upper := dst.TriBand() + ns, ks := s.SymBand() + if n != ns { + panic("mat: triangle size mismatch") + } + if k != ks { + panic("mat: triangle bandwidth mismatch") + } + + // TODO(vladimir-ch): implement the missing cases below as needed. + t := dst.mat + sU, _ := untransposeExtract(s) + if sbd, ok := sU.(*SymBandDense); ok { + s := sbd.RawSymBand() + if upper { + if s.Uplo == blas.Upper { + // dst is upper triangular, s is stored in upper triangle. + for i := 0; i < n; i++ { + ilen := min(k+1, n-i) + copy(t.Data[i*t.Stride:i*t.Stride+ilen], s.Data[i*s.Stride:i*s.Stride+ilen]) + } + } else { + // dst is upper triangular, s is stored in lower triangle. + // + // The following is a possible implementation for this case but + // is commented out due to lack of test coverage. + // for i := 0; i < n; i++ { + // ilen := min(k+1, n-i) + // for j := 0; j < ilen; j++ { + // t.Data[i*t.Stride+j] = s.Data[(i+j)*s.Stride+k-j] + // } + // } + panic("not implemented") + } + } else { + if s.Uplo == blas.Upper { + // dst is lower triangular, s is stored in upper triangle. + panic("not implemented") + } else { + // dst is lower triangular, s is stored in lower triangle. + panic("not implemented") + } + } + return + } + if upper { + for i := 0; i < n; i++ { + ilen := min(k+1, n-i) + for j := 0; j < ilen; j++ { + t.Data[i*t.Stride+j] = s.At(i, i+j) + } + } + } else { + panic("not implemented") + } +} diff --git a/vendor/gonum.org/v1/gonum/mat/tridiag.go b/vendor/gonum.org/v1/gonum/mat/tridiag.go new file mode 100644 index 0000000000..c001d48631 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/mat/tridiag.go @@ -0,0 +1,417 @@ +// Copyright ©2020 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package mat + +import ( + "math" + + "gonum.org/v1/gonum/blas" + "gonum.org/v1/gonum/blas/blas64" + "gonum.org/v1/gonum/internal/asm/f64" + "gonum.org/v1/gonum/lapack/lapack64" +) + +var ( + tridiagDense *Tridiag + _ Matrix = tridiagDense + _ allMatrix = tridiagDense + _ denseMatrix = tridiagDense + _ Banded = tridiagDense + _ MutableBanded = tridiagDense + _ RawTridiagonaler = tridiagDense +) + +// A RawTridiagonaler can return a lapack64.Tridiagonal representation of the +// receiver. Changes to the elements of DL, D, DU in lapack64.Tridiagonal will +// be reflected in the original matrix, changes to the N field will not. +type RawTridiagonaler interface { + RawTridiagonal() lapack64.Tridiagonal +} + +// Tridiag represents a tridiagonal matrix by its three diagonals. +type Tridiag struct { + mat lapack64.Tridiagonal +} + +// NewTridiag creates a new n×n tridiagonal matrix with the first sub-diagonal +// in dl, the main diagonal in d and the first super-diagonal in du. If all of +// dl, d, and du are nil, new backing slices will be allocated for them. If dl +// and du have length n-1 and d has length n, they will be used as backing +// slices, and changes to the elements of the returned Tridiag will be reflected +// in dl, d, du. If neither of these is true, NewTridiag will panic. +func NewTridiag(n int, dl, d, du []float64) *Tridiag { + if n <= 0 { + if n == 0 { + panic(ErrZeroLength) + } + panic(ErrNegativeDimension) + } + if dl != nil || d != nil || du != nil { + if len(dl) != n-1 || len(d) != n || len(du) != n-1 { + panic(ErrShape) + } + } else { + d = make([]float64, n) + if n > 1 { + dl = make([]float64, n-1) + du = make([]float64, n-1) + } + } + return &Tridiag{ + mat: lapack64.Tridiagonal{ + N: n, + DL: dl, + D: d, + DU: du, + }, + } +} + +// Dims returns the number of rows and columns in the matrix. +func (a *Tridiag) Dims() (r, c int) { + return a.mat.N, a.mat.N +} + +// Bandwidth returns 1, 1 - the upper and lower bandwidths of the matrix. +func (a *Tridiag) Bandwidth() (kl, ku int) { + return 1, 1 +} + +// T performs an implicit transpose by returning the receiver inside a Transpose. +func (a *Tridiag) T() Matrix { + // An alternative would be to return the receiver with DL,DU swapped; the + // untranspose function would then always return false. With Transpose the + // diagonal swapping will be done in tridiagonal routines in lapack like + // lapack64.Gtsv or gonum.Dlagtm based on the trans parameter. + return Transpose{a} +} + +// TBand performs an implicit transpose by returning the receiver inside a +// TransposeBand. +func (a *Tridiag) TBand() Banded { + // An alternative would be to return the receiver with DL,DU swapped; see + // explanation in T above. + return TransposeBand{a} +} + +// RawTridiagonal returns the underlying lapack64.Tridiagonal used by the +// receiver. Changes to elements in the receiver following the call will be +// reflected in the returned matrix. +func (a *Tridiag) RawTridiagonal() lapack64.Tridiagonal { + return a.mat +} + +// SetRawTridiagonal sets the underlying lapack64.Tridiagonal used by the +// receiver. Changes to elements in the receiver following the call will be +// reflected in the input. +func (a *Tridiag) SetRawTridiagonal(mat lapack64.Tridiagonal) { + a.mat = mat +} + +// IsEmpty returns whether the receiver is empty. Empty matrices can be the +// receiver for size-restricted operations. The receiver can be zeroed using +// Reset. +func (a *Tridiag) IsEmpty() bool { + return a.mat.N == 0 +} + +// Reset empties the matrix so that it can be reused as the receiver of a +// dimensionally restricted operation. +// +// Reset should not be used when the matrix shares backing data. See the Reseter +// interface for more information. +func (a *Tridiag) Reset() { + a.mat.N = 0 + a.mat.DL = a.mat.DL[:0] + a.mat.D = a.mat.D[:0] + a.mat.DU = a.mat.DU[:0] +} + +// CloneFromTridiag makes a copy of the input Tridiag into the receiver, +// overwriting the previous value of the receiver. CloneFromTridiag does not +// place any restrictions on receiver shape. +func (a *Tridiag) CloneFromTridiag(from *Tridiag) { + n := from.mat.N + switch n { + case 0: + panic(ErrZeroLength) + case 1: + a.mat = lapack64.Tridiagonal{ + N: 1, + DL: use(a.mat.DL, 0), + D: use(a.mat.D, 1), + DU: use(a.mat.DU, 0), + } + a.mat.D[0] = from.mat.D[0] + default: + a.mat = lapack64.Tridiagonal{ + N: n, + DL: use(a.mat.DL, n-1), + D: use(a.mat.D, n), + DU: use(a.mat.DU, n-1), + } + copy(a.mat.DL, from.mat.DL) + copy(a.mat.D, from.mat.D) + copy(a.mat.DU, from.mat.DU) + } +} + +// DiagView returns the diagonal as a matrix backed by the original data. +func (a *Tridiag) DiagView() Diagonal { + return &DiagDense{ + mat: blas64.Vector{ + N: a.mat.N, + Data: a.mat.D[:a.mat.N], + Inc: 1, + }, + } +} + +// Zero sets all of the matrix elements to zero. +func (a *Tridiag) Zero() { + zero(a.mat.DL) + zero(a.mat.D) + zero(a.mat.DU) +} + +// Trace returns the trace of the matrix. +// +// Trace will panic with ErrZeroLength if the matrix has zero size. +func (a *Tridiag) Trace() float64 { + if a.IsEmpty() { + panic(ErrZeroLength) + } + return f64.Sum(a.mat.D) +} + +// Norm returns the specified norm of the receiver. Valid norms are: +// +// 1 - The maximum absolute column sum +// 2 - The Frobenius norm, the square root of the sum of the squares of the elements +// Inf - The maximum absolute row sum +// +// Norm will panic with ErrNormOrder if an illegal norm is specified and with +// ErrZeroLength if the matrix has zero size. +func (a *Tridiag) Norm(norm float64) float64 { + if a.IsEmpty() { + panic(ErrZeroLength) + } + return lapack64.Langt(normLapack(norm, false), a.mat) +} + +// MulVecTo computes A⋅x or Aᵀ⋅x storing the result into dst. +func (a *Tridiag) MulVecTo(dst *VecDense, trans bool, x Vector) { + n := a.mat.N + if x.Len() != n { + panic(ErrShape) + } + dst.reuseAsNonZeroed(n) + t := blas.NoTrans + if trans { + t = blas.Trans + } + xMat, _ := untransposeExtract(x) + if xVec, ok := xMat.(*VecDense); ok && dst != xVec { + dst.checkOverlap(xVec.mat) + lapack64.Lagtm(t, 1, a.mat, xVec.asGeneral(), 0, dst.asGeneral()) + } else { + xCopy := getVecDenseWorkspace(n, false) + xCopy.CloneFromVec(x) + lapack64.Lagtm(t, 1, a.mat, xCopy.asGeneral(), 0, dst.asGeneral()) + putVecDenseWorkspace(xCopy) + } +} + +// SolveTo solves a tridiagonal system A⋅X = B or Aᵀ⋅X = B where A is an +// n×n tridiagonal matrix represented by the receiver and B is a given n×nrhs +// matrix. If A is non-singular, the result will be stored into dst and nil will +// be returned. If A is singular, the contents of dst will be undefined and a +// Condition error will be returned. +func (a *Tridiag) SolveTo(dst *Dense, trans bool, b Matrix) error { + n, nrhs := b.Dims() + if n != a.mat.N { + panic(ErrShape) + } + + dst.reuseAsNonZeroed(n, nrhs) + bU, bTrans := untranspose(b) + if dst == bU { + if bTrans { + work := getDenseWorkspace(n, nrhs, false) + defer putDenseWorkspace(work) + work.Copy(b) + dst.Copy(work) + } + } else { + if rm, ok := bU.(RawMatrixer); ok { + dst.checkOverlap(rm.RawMatrix()) + } + dst.Copy(b) + } + + var aCopy Tridiag + aCopy.CloneFromTridiag(a) + var ok bool + if trans { + ok = lapack64.Gtsv(blas.Trans, aCopy.mat, dst.mat) + } else { + ok = lapack64.Gtsv(blas.NoTrans, aCopy.mat, dst.mat) + } + if !ok { + return Condition(math.Inf(1)) + } + return nil +} + +// SolveVecTo solves a tridiagonal system A⋅X = B or Aᵀ⋅X = B where A is an +// n×n tridiagonal matrix represented by the receiver and b is a given n-vector. +// If A is non-singular, the result will be stored into dst and nil will be +// returned. If A is singular, the contents of dst will be undefined and a +// Condition error will be returned. +func (a *Tridiag) SolveVecTo(dst *VecDense, trans bool, b Vector) error { + n, nrhs := b.Dims() + if n != a.mat.N || nrhs != 1 { + panic(ErrShape) + } + if b, ok := b.(RawVectorer); ok && dst != b { + dst.checkOverlap(b.RawVector()) + } + dst.reuseAsNonZeroed(n) + if dst != b { + dst.CopyVec(b) + } + var aCopy Tridiag + aCopy.CloneFromTridiag(a) + var ok bool + if trans { + ok = lapack64.Gtsv(blas.Trans, aCopy.mat, dst.asGeneral()) + } else { + ok = lapack64.Gtsv(blas.NoTrans, aCopy.mat, dst.asGeneral()) + } + if !ok { + return Condition(math.Inf(1)) + } + return nil +} + +// DoNonZero calls the function fn for each of the non-zero elements of A. The +// function fn takes a row/column index and the element value of A at (i,j). +func (a *Tridiag) DoNonZero(fn func(i, j int, v float64)) { + for i, aij := range a.mat.DU { + if aij != 0 { + fn(i, i+1, aij) + } + } + for i, aii := range a.mat.D { + if aii != 0 { + fn(i, i, aii) + } + } + for i, aij := range a.mat.DL { + if aij != 0 { + fn(i+1, i, aij) + } + } +} + +// DoRowNonZero calls the function fn for each of the non-zero elements of row i +// of A. The function fn takes a row/column index and the element value of A at +// (i,j). +func (a *Tridiag) DoRowNonZero(i int, fn func(i, j int, v float64)) { + n := a.mat.N + if uint(i) >= uint(n) { + panic(ErrRowAccess) + } + if n == 1 { + v := a.mat.D[0] + if v != 0 { + fn(0, 0, v) + } + return + } + switch i { + case 0: + v := a.mat.D[0] + if v != 0 { + fn(i, 0, v) + } + v = a.mat.DU[0] + if v != 0 { + fn(i, 1, v) + } + case n - 1: + v := a.mat.DL[n-2] + if v != 0 { + fn(n-1, n-2, v) + } + v = a.mat.D[n-1] + if v != 0 { + fn(n-1, n-1, v) + } + default: + v := a.mat.DL[i-1] + if v != 0 { + fn(i, i-1, v) + } + v = a.mat.D[i] + if v != 0 { + fn(i, i, v) + } + v = a.mat.DU[i] + if v != 0 { + fn(i, i+1, v) + } + } +} + +// DoColNonZero calls the function fn for each of the non-zero elements of +// column j of A. The function fn takes a row/column index and the element value +// of A at (i, j). +func (a *Tridiag) DoColNonZero(j int, fn func(i, j int, v float64)) { + n := a.mat.N + if uint(j) >= uint(n) { + panic(ErrColAccess) + } + if n == 1 { + v := a.mat.D[0] + if v != 0 { + fn(0, 0, v) + } + return + } + switch j { + case 0: + v := a.mat.D[0] + if v != 0 { + fn(0, 0, v) + } + v = a.mat.DL[0] + if v != 0 { + fn(1, 0, v) + } + case n - 1: + v := a.mat.DU[n-2] + if v != 0 { + fn(n-2, n-1, v) + } + v = a.mat.D[n-1] + if v != 0 { + fn(n-1, n-1, v) + } + default: + v := a.mat.DU[j-1] + if v != 0 { + fn(j-1, j, v) + } + v = a.mat.D[j] + if v != 0 { + fn(j, j, v) + } + v = a.mat.DL[j] + if v != 0 { + fn(j+1, j, v) + } + } +} diff --git a/vendor/gonum.org/v1/gonum/mat/vector.go b/vendor/gonum.org/v1/gonum/mat/vector.go new file mode 100644 index 0000000000..5c5d3ff749 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/mat/vector.go @@ -0,0 +1,855 @@ +// Copyright ©2013 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package mat + +import ( + "math" + + "gonum.org/v1/gonum/blas" + "gonum.org/v1/gonum/blas/blas64" + "gonum.org/v1/gonum/internal/asm/f64" +) + +var ( + vector *VecDense + + _ Matrix = vector + _ allMatrix = vector + _ Vector = vector + _ Reseter = vector + _ MutableVector = vector +) + +// Vector is a vector. +type Vector interface { + Matrix + AtVec(int) float64 + Len() int +} + +// A MutableVector can set elements of a vector. +type MutableVector interface { + Vector + SetVec(i int, v float64) +} + +// TransposeVec is a type for performing an implicit transpose of a Vector. +// It implements the Vector interface, returning values from the transpose +// of the vector within. +type TransposeVec struct { + Vector Vector +} + +// At returns the value of the element at row i and column j of the transposed +// matrix, that is, row j and column i of the Vector field. +func (t TransposeVec) At(i, j int) float64 { + return t.Vector.At(j, i) +} + +// AtVec returns the element at position i. It panics if i is out of bounds. +func (t TransposeVec) AtVec(i int) float64 { + return t.Vector.AtVec(i) +} + +// Dims returns the dimensions of the transposed vector. +func (t TransposeVec) Dims() (r, c int) { + c, r = t.Vector.Dims() + return r, c +} + +// T performs an implicit transpose by returning the Vector field. +func (t TransposeVec) T() Matrix { + return t.Vector +} + +// Len returns the number of columns in the vector. +func (t TransposeVec) Len() int { + return t.Vector.Len() +} + +// TVec performs an implicit transpose by returning the Vector field. +func (t TransposeVec) TVec() Vector { + return t.Vector +} + +// Untranspose returns the Vector field. +func (t TransposeVec) Untranspose() Matrix { + return t.Vector +} + +func (t TransposeVec) UntransposeVec() Vector { + return t.Vector +} + +// VecDense represents a column vector. +type VecDense struct { + mat blas64.Vector + // A BLAS vector can have a negative increment, but allowing this + // in the mat type complicates a lot of code, and doesn't gain anything. + // VecDense must have positive increment in this package. +} + +// NewVecDense creates a new VecDense of length n. If data == nil, +// a new slice is allocated for the backing slice. If len(data) == n, data is +// used as the backing slice, and changes to the elements of the returned VecDense +// will be reflected in data. If neither of these is true, NewVecDense will panic. +// NewVecDense will panic if n is zero. +func NewVecDense(n int, data []float64) *VecDense { + if n <= 0 { + if n == 0 { + panic(ErrZeroLength) + } + panic("mat: negative dimension") + } + if len(data) != n && data != nil { + panic(ErrShape) + } + if data == nil { + data = make([]float64, n) + } + return &VecDense{ + mat: blas64.Vector{ + N: n, + Inc: 1, + Data: data, + }, + } +} + +// SliceVec returns a new Vector that shares backing data with the receiver. +// The returned matrix starts at i of the receiver and extends k-i elements. +// SliceVec panics with ErrIndexOutOfRange if the slice is outside the capacity +// of the receiver. +func (v *VecDense) SliceVec(i, k int) Vector { + return v.sliceVec(i, k) +} + +func (v *VecDense) sliceVec(i, k int) *VecDense { + if i < 0 || k <= i || v.Cap() < k { + panic(ErrIndexOutOfRange) + } + return &VecDense{ + mat: blas64.Vector{ + N: k - i, + Inc: v.mat.Inc, + Data: v.mat.Data[i*v.mat.Inc : (k-1)*v.mat.Inc+1], + }, + } +} + +// Dims returns the number of rows and columns in the matrix. Columns is always 1 +// for a non-Reset vector. +func (v *VecDense) Dims() (r, c int) { + if v.IsEmpty() { + return 0, 0 + } + return v.mat.N, 1 +} + +// Caps returns the number of rows and columns in the backing matrix. Columns is always 1 +// for a non-Reset vector. +func (v *VecDense) Caps() (r, c int) { + if v.IsEmpty() { + return 0, 0 + } + return v.Cap(), 1 +} + +// Len returns the length of the vector. +func (v *VecDense) Len() int { + return v.mat.N +} + +// Cap returns the capacity of the vector. +func (v *VecDense) Cap() int { + if v.IsEmpty() { + return 0 + } + return (cap(v.mat.Data)-1)/v.mat.Inc + 1 +} + +// T performs an implicit transpose by returning the receiver inside a Transpose. +func (v *VecDense) T() Matrix { + return Transpose{v} +} + +// TVec performs an implicit transpose by returning the receiver inside a TransposeVec. +func (v *VecDense) TVec() Vector { + return TransposeVec{v} +} + +// Reset empties the matrix so that it can be reused as the +// receiver of a dimensionally restricted operation. +// +// Reset should not be used when the matrix shares backing data. +// See the Reseter interface for more information. +func (v *VecDense) Reset() { + // No change of Inc or N to 0 may be + // made unless both are set to 0. + v.mat.Inc = 0 + v.mat.N = 0 + v.mat.Data = v.mat.Data[:0] +} + +// Zero sets all of the matrix elements to zero. +func (v *VecDense) Zero() { + for i := 0; i < v.mat.N; i++ { + v.mat.Data[v.mat.Inc*i] = 0 + } +} + +// CloneFromVec makes a copy of a into the receiver, overwriting the previous value +// of the receiver. +func (v *VecDense) CloneFromVec(a Vector) { + if v == a { + return + } + n := a.Len() + v.mat = blas64.Vector{ + N: n, + Inc: 1, + Data: use(v.mat.Data, n), + } + if r, ok := a.(RawVectorer); ok { + blas64.Copy(r.RawVector(), v.mat) + return + } + for i := 0; i < a.Len(); i++ { + v.setVec(i, a.AtVec(i)) + } +} + +// VecDenseCopyOf returns a newly allocated copy of the elements of a. +func VecDenseCopyOf(a Vector) *VecDense { + v := &VecDense{} + v.CloneFromVec(a) + return v +} + +// RawVector returns the underlying blas64.Vector used by the receiver. +// Changes to elements in the receiver following the call will be reflected +// in returned blas64.Vector. +func (v *VecDense) RawVector() blas64.Vector { + return v.mat +} + +// SetRawVector sets the underlying blas64.Vector used by the receiver. +// Changes to elements in the receiver following the call will be reflected +// in the input. +func (v *VecDense) SetRawVector(a blas64.Vector) { + v.mat = a +} + +// CopyVec makes a copy of elements of a into the receiver. It is similar to the +// built-in copy; it copies as much as the overlap between the two vectors and +// returns the number of elements it copied. +func (v *VecDense) CopyVec(a Vector) int { + n := min(v.Len(), a.Len()) + if v == a { + return n + } + if r, ok := a.(RawVectorer); ok { + src := r.RawVector() + src.N = n + dst := v.mat + dst.N = n + blas64.Copy(src, dst) + return n + } + for i := 0; i < n; i++ { + v.setVec(i, a.AtVec(i)) + } + return n +} + +// Norm returns the specified norm of the receiver. Valid norms are: +// +// 1 - The sum of the element magnitudes +// 2 - The Euclidean norm, the square root of the sum of the squares of the elements +// Inf - The maximum element magnitude +// +// Norm will panic with ErrNormOrder if an illegal norm is specified and with +// ErrZeroLength if the vector has zero size. +func (v *VecDense) Norm(norm float64) float64 { + if v.IsEmpty() { + panic(ErrZeroLength) + } + switch norm { + default: + panic(ErrNormOrder) + case 1: + return blas64.Asum(v.mat) + case 2: + return blas64.Nrm2(v.mat) + case math.Inf(1): + imax := blas64.Iamax(v.mat) + return math.Abs(v.at(imax)) + } +} + +// ScaleVec scales the vector a by alpha, placing the result in the receiver. +func (v *VecDense) ScaleVec(alpha float64, a Vector) { + n := a.Len() + + if v == a { + if v.mat.Inc == 1 { + f64.ScalUnitary(alpha, v.mat.Data) + return + } + f64.ScalInc(alpha, v.mat.Data, uintptr(n), uintptr(v.mat.Inc)) + return + } + + v.reuseAsNonZeroed(n) + + if rv, ok := a.(RawVectorer); ok { + mat := rv.RawVector() + v.checkOverlap(mat) + if v.mat.Inc == 1 && mat.Inc == 1 { + f64.ScalUnitaryTo(v.mat.Data, alpha, mat.Data) + return + } + f64.ScalIncTo(v.mat.Data, uintptr(v.mat.Inc), + alpha, mat.Data, uintptr(n), uintptr(mat.Inc)) + return + } + + for i := 0; i < n; i++ { + v.setVec(i, alpha*a.AtVec(i)) + } +} + +// AddScaledVec adds the vectors a and alpha*b, placing the result in the receiver. +func (v *VecDense) AddScaledVec(a Vector, alpha float64, b Vector) { + if alpha == 1 { + v.AddVec(a, b) + return + } + if alpha == -1 { + v.SubVec(a, b) + return + } + + ar := a.Len() + br := b.Len() + + if ar != br { + panic(ErrShape) + } + + var amat, bmat blas64.Vector + fast := true + aU, _ := untransposeExtract(a) + if rv, ok := aU.(*VecDense); ok { + amat = rv.mat + if v != a { + v.checkOverlap(amat) + } + } else { + fast = false + } + bU, _ := untransposeExtract(b) + if rv, ok := bU.(*VecDense); ok { + bmat = rv.mat + if v != b { + v.checkOverlap(bmat) + } + } else { + fast = false + } + + v.reuseAsNonZeroed(ar) + + switch { + case alpha == 0: // v <- a + if v == a { + return + } + v.CopyVec(a) + case v == a && v == b: // v <- v + alpha * v = (alpha + 1) * v + blas64.Scal(alpha+1, v.mat) + case !fast: // v <- a + alpha * b without blas64 support. + for i := 0; i < ar; i++ { + v.setVec(i, a.AtVec(i)+alpha*b.AtVec(i)) + } + case v == a && v != b: // v <- v + alpha * b + if v.mat.Inc == 1 && bmat.Inc == 1 { + // Fast path for a common case. + f64.AxpyUnitaryTo(v.mat.Data, alpha, bmat.Data, amat.Data) + } else { + f64.AxpyInc(alpha, bmat.Data, v.mat.Data, + uintptr(ar), uintptr(bmat.Inc), uintptr(v.mat.Inc), 0, 0) + } + default: // v <- a + alpha * b or v <- a + alpha * v + if v.mat.Inc == 1 && amat.Inc == 1 && bmat.Inc == 1 { + // Fast path for a common case. + f64.AxpyUnitaryTo(v.mat.Data, alpha, bmat.Data, amat.Data) + } else { + f64.AxpyIncTo(v.mat.Data, uintptr(v.mat.Inc), 0, + alpha, bmat.Data, amat.Data, + uintptr(ar), uintptr(bmat.Inc), uintptr(amat.Inc), 0, 0) + } + } +} + +// AddVec adds the vectors a and b, placing the result in the receiver. +func (v *VecDense) AddVec(a, b Vector) { + ar := a.Len() + br := b.Len() + + if ar != br { + panic(ErrShape) + } + + v.reuseAsNonZeroed(ar) + + aU, _ := untransposeExtract(a) + bU, _ := untransposeExtract(b) + + if arv, ok := aU.(*VecDense); ok { + if brv, ok := bU.(*VecDense); ok { + amat := arv.mat + bmat := brv.mat + + if v != a { + v.checkOverlap(amat) + } + if v != b { + v.checkOverlap(bmat) + } + + if v.mat.Inc == 1 && amat.Inc == 1 && bmat.Inc == 1 { + // Fast path for a common case. + f64.AxpyUnitaryTo(v.mat.Data, 1, bmat.Data, amat.Data) + return + } + f64.AxpyIncTo(v.mat.Data, uintptr(v.mat.Inc), 0, + 1, bmat.Data, amat.Data, + uintptr(ar), uintptr(bmat.Inc), uintptr(amat.Inc), 0, 0) + return + } + } + + for i := 0; i < ar; i++ { + v.setVec(i, a.AtVec(i)+b.AtVec(i)) + } +} + +// SubVec subtracts the vector b from a, placing the result in the receiver. +func (v *VecDense) SubVec(a, b Vector) { + ar := a.Len() + br := b.Len() + + if ar != br { + panic(ErrShape) + } + + v.reuseAsNonZeroed(ar) + + aU, _ := untransposeExtract(a) + bU, _ := untransposeExtract(b) + + if arv, ok := aU.(*VecDense); ok { + if brv, ok := bU.(*VecDense); ok { + amat := arv.mat + bmat := brv.mat + + if v != a { + v.checkOverlap(amat) + } + if v != b { + v.checkOverlap(bmat) + } + + if v.mat.Inc == 1 && amat.Inc == 1 && bmat.Inc == 1 { + // Fast path for a common case. + f64.AxpyUnitaryTo(v.mat.Data, -1, bmat.Data, amat.Data) + return + } + f64.AxpyIncTo(v.mat.Data, uintptr(v.mat.Inc), 0, + -1, bmat.Data, amat.Data, + uintptr(ar), uintptr(bmat.Inc), uintptr(amat.Inc), 0, 0) + return + } + } + + for i := 0; i < ar; i++ { + v.setVec(i, a.AtVec(i)-b.AtVec(i)) + } +} + +// MulElemVec performs element-wise multiplication of a and b, placing the result +// in the receiver. +func (v *VecDense) MulElemVec(a, b Vector) { + ar := a.Len() + br := b.Len() + + if ar != br { + panic(ErrShape) + } + + v.reuseAsNonZeroed(ar) + + aU, _ := untransposeExtract(a) + bU, _ := untransposeExtract(b) + + if arv, ok := aU.(*VecDense); ok { + if brv, ok := bU.(*VecDense); ok { + amat := arv.mat + bmat := brv.mat + + if v != a { + v.checkOverlap(amat) + } + if v != b { + v.checkOverlap(bmat) + } + + if v.mat.Inc == 1 && amat.Inc == 1 && bmat.Inc == 1 { + // Fast path for a common case. + for i, a := range amat.Data { + v.mat.Data[i] = a * bmat.Data[i] + } + return + } + var ia, ib int + for i := 0; i < ar; i++ { + v.setVec(i, amat.Data[ia]*bmat.Data[ib]) + ia += amat.Inc + ib += bmat.Inc + } + return + } + } + + for i := 0; i < ar; i++ { + v.setVec(i, a.AtVec(i)*b.AtVec(i)) + } +} + +// DivElemVec performs element-wise division of a by b, placing the result +// in the receiver. +func (v *VecDense) DivElemVec(a, b Vector) { + ar := a.Len() + br := b.Len() + + if ar != br { + panic(ErrShape) + } + + v.reuseAsNonZeroed(ar) + + aU, _ := untransposeExtract(a) + bU, _ := untransposeExtract(b) + + if arv, ok := aU.(*VecDense); ok { + if brv, ok := bU.(*VecDense); ok { + amat := arv.mat + bmat := brv.mat + + if v != a { + v.checkOverlap(amat) + } + if v != b { + v.checkOverlap(bmat) + } + + if v.mat.Inc == 1 && amat.Inc == 1 && bmat.Inc == 1 { + // Fast path for a common case. + for i, a := range amat.Data { + v.setVec(i, a/bmat.Data[i]) + } + return + } + var ia, ib int + for i := 0; i < ar; i++ { + v.setVec(i, amat.Data[ia]/bmat.Data[ib]) + ia += amat.Inc + ib += bmat.Inc + } + } + } + + for i := 0; i < ar; i++ { + v.setVec(i, a.AtVec(i)/b.AtVec(i)) + } +} + +// MulVec computes a * b. The result is stored into the receiver. +// MulVec panics if the number of columns in a does not equal the number of rows in b +// or if the number of columns in b does not equal 1. +func (v *VecDense) MulVec(a Matrix, b Vector) { + r, c := a.Dims() + br, bc := b.Dims() + if c != br || bc != 1 { + panic(ErrShape) + } + + aU, trans := untransposeExtract(a) + var bmat blas64.Vector + fast := true + bU, _ := untransposeExtract(b) + if rv, ok := bU.(*VecDense); ok { + bmat = rv.mat + if v != b { + v.checkOverlap(bmat) + } + } else { + fast = false + } + + v.reuseAsNonZeroed(r) + var restore func() + if v == aU { + v, restore = v.isolatedWorkspace(aU.(*VecDense)) + defer restore() + } else if v == b { + v, restore = v.isolatedWorkspace(b) + defer restore() + } + + // TODO(kortschak): Improve the non-fast paths. + switch aU := aU.(type) { + case Vector: + if b.Len() == 1 { + // {n,1} x {1,1} + v.ScaleVec(b.AtVec(0), aU) + return + } + + // {1,n} x {n,1} + if fast { + if rv, ok := aU.(*VecDense); ok { + amat := rv.mat + if v != aU { + v.checkOverlap(amat) + } + + if amat.Inc == 1 && bmat.Inc == 1 { + // Fast path for a common case. + v.setVec(0, f64.DotUnitary(amat.Data, bmat.Data)) + return + } + v.setVec(0, f64.DotInc(amat.Data, bmat.Data, + uintptr(c), uintptr(amat.Inc), uintptr(bmat.Inc), 0, 0)) + return + } + } + var sum float64 + for i := 0; i < c; i++ { + sum += aU.AtVec(i) * b.AtVec(i) + } + v.setVec(0, sum) + return + case *SymBandDense: + if fast { + aU.checkOverlap(v.asGeneral()) + blas64.Sbmv(1, aU.mat, bmat, 0, v.mat) + return + } + case *SymDense: + if fast { + aU.checkOverlap(v.asGeneral()) + blas64.Symv(1, aU.mat, bmat, 0, v.mat) + return + } + case *TriDense: + if fast { + v.CopyVec(b) + aU.checkOverlap(v.asGeneral()) + ta := blas.NoTrans + if trans { + ta = blas.Trans + } + blas64.Trmv(ta, aU.mat, v.mat) + return + } + case *Dense: + if fast { + aU.checkOverlap(v.asGeneral()) + t := blas.NoTrans + if trans { + t = blas.Trans + } + blas64.Gemv(t, 1, aU.mat, bmat, 0, v.mat) + return + } + default: + if fast { + for i := 0; i < r; i++ { + var f float64 + for j := 0; j < c; j++ { + f += a.At(i, j) * bmat.Data[j*bmat.Inc] + } + v.setVec(i, f) + } + return + } + } + + for i := 0; i < r; i++ { + var f float64 + for j := 0; j < c; j++ { + f += a.At(i, j) * b.AtVec(j) + } + v.setVec(i, f) + } +} + +// ReuseAsVec changes the receiver if it IsEmpty() to be of size n×1. +// +// ReuseAsVec re-uses the backing data slice if it has sufficient capacity, +// otherwise a new slice is allocated. The backing data is zero on return. +// +// ReuseAsVec panics if the receiver is not empty, and panics if +// the input size is less than one. To empty the receiver for re-use, +// Reset should be used. +func (v *VecDense) ReuseAsVec(n int) { + if n <= 0 { + if n == 0 { + panic(ErrZeroLength) + } + panic(ErrNegativeDimension) + } + if !v.IsEmpty() { + panic(ErrReuseNonEmpty) + } + v.reuseAsZeroed(n) +} + +// reuseAsNonZeroed resizes an empty vector to a r×1 vector, +// or checks that a non-empty matrix is r×1. +func (v *VecDense) reuseAsNonZeroed(r int) { + // reuseAsNonZeroed must be kept in sync with reuseAsZeroed. + if r == 0 { + panic(ErrZeroLength) + } + if v.IsEmpty() { + v.mat = blas64.Vector{ + N: r, + Inc: 1, + Data: use(v.mat.Data, r), + } + return + } + if r != v.mat.N { + panic(ErrShape) + } +} + +// reuseAsZeroed resizes an empty vector to a r×1 vector, +// or checks that a non-empty matrix is r×1. +func (v *VecDense) reuseAsZeroed(r int) { + // reuseAsZeroed must be kept in sync with reuseAsNonZeroed. + if r == 0 { + panic(ErrZeroLength) + } + if v.IsEmpty() { + v.mat = blas64.Vector{ + N: r, + Inc: 1, + Data: useZeroed(v.mat.Data, r), + } + return + } + if r != v.mat.N { + panic(ErrShape) + } + v.Zero() +} + +// IsEmpty returns whether the receiver is empty. Empty matrices can be the +// receiver for size-restricted operations. The receiver can be emptied using +// Reset. +func (v *VecDense) IsEmpty() bool { + // It must be the case that v.Dims() returns + // zeros in this case. See comment in Reset(). + return v.mat.Inc == 0 +} + +func (v *VecDense) isolatedWorkspace(a Vector) (n *VecDense, restore func()) { + l := a.Len() + if l == 0 { + panic(ErrZeroLength) + } + n = getVecDenseWorkspace(l, false) + return n, func() { + v.CopyVec(n) + putVecDenseWorkspace(n) + } +} + +// asDense returns a Dense representation of the receiver with the same +// underlying data. +func (v *VecDense) asDense() *Dense { + return &Dense{ + mat: v.asGeneral(), + capRows: v.mat.N, + capCols: 1, + } +} + +// asGeneral returns a blas64.General representation of the receiver with the +// same underlying data. +func (v *VecDense) asGeneral() blas64.General { + return blas64.General{ + Rows: v.mat.N, + Cols: 1, + Stride: v.mat.Inc, + Data: v.mat.Data, + } +} + +// ColViewOf reflects the column j of the RawMatrixer m, into the receiver +// backed by the same underlying data. The receiver must either be empty +// have length equal to the number of rows of m. +func (v *VecDense) ColViewOf(m RawMatrixer, j int) { + rm := m.RawMatrix() + + if j >= rm.Cols || j < 0 { + panic(ErrColAccess) + } + if !v.IsEmpty() && v.mat.N != rm.Rows { + panic(ErrShape) + } + + v.mat.Inc = rm.Stride + v.mat.Data = rm.Data[j : (rm.Rows-1)*rm.Stride+j+1] + v.mat.N = rm.Rows +} + +// RowViewOf reflects the row i of the RawMatrixer m, into the receiver +// backed by the same underlying data. The receiver must either be +// empty or have length equal to the number of columns of m. +func (v *VecDense) RowViewOf(m RawMatrixer, i int) { + rm := m.RawMatrix() + + if i >= rm.Rows || i < 0 { + panic(ErrRowAccess) + } + if !v.IsEmpty() && v.mat.N != rm.Cols { + panic(ErrShape) + } + + v.mat.Inc = 1 + v.mat.Data = rm.Data[i*rm.Stride : i*rm.Stride+rm.Cols] + v.mat.N = rm.Cols +} + +// Permute rearranges the elements of the n-vector v in the receiver as +// specified by the permutation p[0],p[1],...,p[n-1] of the integers 0,...,n-1. +// +// If inverse is false, the given permutation is applied: +// +// v[p[i]] is moved to v[i] for i=0,1,...,n-1. +// +// If inverse is true, the inverse permutation is applied: +// +// v[i] is moved to v[p[i]] for i=0,1,...,n-1. +// +// p must have length n, otherwise Permute will panic. +func (v *VecDense) Permute(p []int, inverse bool) { + v.asDense().PermuteRows(p, inverse) +} diff --git a/vendor/gonum.org/v1/gonum/mathext/README.md b/vendor/gonum.org/v1/gonum/mathext/README.md new file mode 100644 index 0000000000..9f462f11f4 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/mathext/README.md @@ -0,0 +1,6 @@ +# mathext + +[![go.dev reference](https://pkg.go.dev/badge/gonum.org/v1/gonum/mathext)](https://pkg.go.dev/gonum.org/v1/gonum/mathext) +[![GoDoc](https://godocs.io/gonum.org/v1/gonum/mathext?status.svg)](https://godocs.io/gonum.org/v1/gonum/mathext) + +Package mathext implements basic elementary functions not included in the Go standard library. diff --git a/vendor/gonum.org/v1/gonum/mathext/airy.go b/vendor/gonum.org/v1/gonum/mathext/airy.go new file mode 100644 index 0000000000..f2904b4766 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/mathext/airy.go @@ -0,0 +1,41 @@ +// Copyright ©2016 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package mathext + +import "gonum.org/v1/gonum/mathext/internal/amos" + +// AiryAi returns the value of the Airy function at z. The Airy function here, +// Ai(z), is one of the two linearly independent solutions to +// +// y′′ - y*z = 0. +// +// See http://mathworld.wolfram.com/AiryFunctions.html for more detailed information. +func AiryAi(z complex128) complex128 { + // id specifies the order of the derivative to compute, + // 0 for the function itself and 1 for the derivative. + // kode specifies the scaling option. See the function + // documentation for the exact behavior. + id := 0 + kode := 1 + air, aii, _, _ := amos.Zairy(real(z), imag(z), id, kode) + return complex(air, aii) +} + +// AiryAiDeriv returns the value of the derivative of the Airy function at z. The +// Airy function here, Ai(z), is one of the two linearly independent solutions to +// +// y′′ - y*z = 0. +// +// See http://mathworld.wolfram.com/AiryFunctions.html for more detailed information. +func AiryAiDeriv(z complex128) complex128 { + // id specifies the order of the derivative to compute, + // 0 for the function itself and 1 for the derivative. + // kode specifies the scaling option. See the function + // documentation for the exact behavior. + id := 1 + kode := 1 + air, aii, _, _ := amos.Zairy(real(z), imag(z), id, kode) + return complex(air, aii) +} diff --git a/vendor/gonum.org/v1/gonum/mathext/beta.go b/vendor/gonum.org/v1/gonum/mathext/beta.go new file mode 100644 index 0000000000..2df51f3ddc --- /dev/null +++ b/vendor/gonum.org/v1/gonum/mathext/beta.go @@ -0,0 +1,40 @@ +// Copyright ©2016 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package mathext + +import "gonum.org/v1/gonum/mathext/internal/gonum" + +// Beta returns the value of the complete beta function B(a, b). It is defined as +// +// Γ(a)Γ(b) / Γ(a+b) +// +// Special cases are: +// +// B(a,b) returns NaN if a or b is Inf +// B(a,b) returns NaN if a and b are 0 +// B(a,b) returns NaN if a or b is NaN +// B(a,b) returns NaN if a or b is < 0 +// B(a,b) returns +Inf if a xor b is 0. +// +// See http://mathworld.wolfram.com/BetaFunction.html for more detailed information. +func Beta(a, b float64) float64 { + return gonum.Beta(a, b) +} + +// Lbeta returns the natural logarithm of the complete beta function B(a,b). +// Lbeta is defined as: +// +// Ln(Γ(a)Γ(b)/Γ(a+b)) +// +// Special cases are: +// +// Lbeta(a,b) returns NaN if a or b is Inf +// Lbeta(a,b) returns NaN if a and b are 0 +// Lbeta(a,b) returns NaN if a or b is NaN +// Lbeta(a,b) returns NaN if a or b is < 0 +// Lbeta(a,b) returns +Inf if a xor b is 0. +func Lbeta(a, b float64) float64 { + return gonum.Lbeta(a, b) +} diff --git a/vendor/gonum.org/v1/gonum/mathext/betainc.go b/vendor/gonum.org/v1/gonum/mathext/betainc.go new file mode 100644 index 0000000000..9a0c61a9bb --- /dev/null +++ b/vendor/gonum.org/v1/gonum/mathext/betainc.go @@ -0,0 +1,33 @@ +// Copyright ©2016 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package mathext + +import "gonum.org/v1/gonum/mathext/internal/cephes" + +// RegIncBeta returns the value of the regularized incomplete beta function +// I(x;a,b). It is defined as +// +// I(x;a,b) = B(x;a,b) / B(a,b) +// = Γ(a+b) / (Γ(a)*Γ(b)) * int_0^x u^(a-1) * (1-u)^(b-1) du. +// +// The domain of definition is 0 <= x <= 1, and the parameters a and b must be positive. +// For other values of x, a, and b RegIncBeta will panic. +func RegIncBeta(a, b float64, x float64) float64 { + return cephes.Incbet(a, b, x) +} + +// InvRegIncBeta computes the inverse of the regularized incomplete beta function. +// It returns the x for which +// +// y = I(x;a,b) +// +// The domain of definition is 0 <= y <= 1, and the parameters a and b must be +// positive. For other values of x, a, and b InvRegIncBeta will panic. +func InvRegIncBeta(a, b float64, y float64) float64 { + if y < 0 || 1 < y { + panic("mathext: parameter out of range") + } + return cephes.Incbi(a, b, y) +} diff --git a/vendor/gonum.org/v1/gonum/mathext/digamma.go b/vendor/gonum.org/v1/gonum/mathext/digamma.go new file mode 100644 index 0000000000..67ebf007ff --- /dev/null +++ b/vendor/gonum.org/v1/gonum/mathext/digamma.go @@ -0,0 +1,45 @@ +// Copyright ©2016 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package mathext + +import ( + "math" +) + +// Digamma returns the logorithmic derivative of the gamma function at x. +// +// ψ(x) = d/dx (Ln (Γ(x)). +func Digamma(x float64) float64 { + // This is adapted from + // http://web.science.mq.edu.au/~mjohnson/code/digamma.c + var result float64 + switch { + case math.IsNaN(x), math.IsInf(x, 1): + return x + case math.IsInf(x, -1): + return math.NaN() + case x == 0: + return math.Copysign(math.Inf(1), -x) + case x < 0: + if x == math.Floor(x) { + return math.NaN() + } + // Reflection formula, http://dlmf.nist.gov/5.5#E4 + _, r := math.Modf(x) + result = -math.Pi / math.Tan(math.Pi*r) + x = 1 - x + } + for ; x < 7; x++ { + // Recurrence relation, http://dlmf.nist.gov/5.5#E2 + result -= 1 / x + } + x -= 0.5 + xx := 1 / x + xx2 := xx * xx + xx4 := xx2 * xx2 + // Asymptotic expansion, http://dlmf.nist.gov/5.11#E2 + result += math.Log(x) + (1.0/24.0)*xx2 - (7.0/960.0)*xx4 + (31.0/8064.0)*xx4*xx2 - (127.0/30720.0)*xx4*xx4 + return result +} diff --git a/vendor/gonum.org/v1/gonum/mathext/doc.go b/vendor/gonum.org/v1/gonum/mathext/doc.go new file mode 100644 index 0000000000..539622d4a8 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/mathext/doc.go @@ -0,0 +1,7 @@ +// Copyright ©2017 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package mathext implements special math functions not implemented by the +// Go standard library. +package mathext // import "gonum.org/v1/gonum/mathext" diff --git a/vendor/gonum.org/v1/gonum/mathext/ell_carlson.go b/vendor/gonum.org/v1/gonum/mathext/ell_carlson.go new file mode 100644 index 0000000000..1334f6b94a --- /dev/null +++ b/vendor/gonum.org/v1/gonum/mathext/ell_carlson.go @@ -0,0 +1,168 @@ +// Copyright ©2017 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package mathext + +import ( + "math" +) + +// EllipticRF computes the symmetric elliptic integral R_F(x,y,z): +// +// R_F(x,y,z) = (1/2)\int_{0}^{\infty}{1/s(t)} dt, +// s(t) = \sqrt{(t+x)(t+y)(t+z)}. +// +// The arguments x, y, z must satisfy the following conditions, otherwise the function returns math.NaN(): +// +// 0 ≤ x,y,z ≤ upper, +// lower ≤ x+y,y+z,z+x, +// +// where: +// +// lower = 5/(2^1022) = 1.112536929253601e-307, +// upper = (2^1022)/5 = 8.988465674311580e+306. +// +// The definition of the symmetric elliptic integral R_F can be found in NIST +// Digital Library of Mathematical Functions (http://dlmf.nist.gov/19.16.E1). +func EllipticRF(x, y, z float64) float64 { + // The original Fortran code was published as Algorithm 577 in ACM TOMS (http://doi.org/10.1145/355958.355970). + // This code is also available as a part of SLATEC Common Mathematical Library (http://netlib.org/slatec/index.html). Later, Carlson described + // an improved version in http://dx.doi.org/10.1007/BF02198293 (also available at https://arxiv.org/abs/math/9409227). + const ( + lower = 5.0 / (1 << 256) / (1 << 256) / (1 << 256) / (1 << 254) // 5*2^-1022 + upper = 1 / lower + tol = 1.2674918778210762260320167734407048051023273568443e-02 // (3ε)^(1/8) + ) + if x < 0 || y < 0 || z < 0 || math.IsNaN(x) || math.IsNaN(y) || math.IsNaN(z) { + return math.NaN() + } + if upper < x || upper < y || upper < z { + return math.NaN() + } + if x+y < lower || y+z < lower || z+x < lower { + return math.NaN() + } + + A0 := (x + y + z) / 3 + An := A0 + Q := math.Max(math.Max(math.Abs(A0-x), math.Abs(A0-y)), math.Abs(A0-z)) / tol + xn, yn, zn := x, y, z + mul := 1.0 + + for Q >= mul*math.Abs(An) { + xnsqrt, ynsqrt, znsqrt := math.Sqrt(xn), math.Sqrt(yn), math.Sqrt(zn) + lambda := xnsqrt*ynsqrt + ynsqrt*znsqrt + znsqrt*xnsqrt + An = (An + lambda) * 0.25 + xn = (xn + lambda) * 0.25 + yn = (yn + lambda) * 0.25 + zn = (zn + lambda) * 0.25 + mul *= 4 + } + + X := (A0 - x) / (mul * An) + Y := (A0 - y) / (mul * An) + Z := -(X + Y) + E2 := X*Y - Z*Z + E3 := X * Y * Z + + // http://dlmf.nist.gov/19.36.E1 + return (1 - 1/10.0*E2 + 1/14.0*E3 + 1/24.0*E2*E2 - 3/44.0*E2*E3 - 5/208.0*E2*E2*E2 + 3/104.0*E3*E3 + 1/16.0*E2*E2*E3) / math.Sqrt(An) +} + +// EllipticRD computes the symmetric elliptic integral R_D(x,y,z): +// +// R_D(x,y,z) = (1/2)\int_{0}^{\infty}{1/(s(t)(t+z))} dt, +// s(t) = \sqrt{(t+x)(t+y)(t+z)}. +// +// The arguments x, y, z must satisfy the following conditions, otherwise the function returns math.NaN(): +// +// 0 ≤ x,y ≤ upper, +// lower ≤ z ≤ upper, +// lower ≤ x+y, +// +// where: +// +// lower = (5/(2^1022))^(1/3) = 4.809554074311679e-103, +// upper = ((2^1022)/5)^(1/3) = 2.079194837087086e+102. +// +// The definition of the symmetric elliptic integral R_D can be found in NIST +// Digital Library of Mathematical Functions (http://dlmf.nist.gov/19.16.E5). +func EllipticRD(x, y, z float64) float64 { + // The original Fortran code was published as Algorithm 577 in ACM TOMS (http://doi.org/10.1145/355958.355970). + // This code is also available as a part of SLATEC Common Mathematical Library (http://netlib.org/slatec/index.html). Later, Carlson described + // an improved version in http://dx.doi.org/10.1007/BF02198293 (also available at https://arxiv.org/abs/math/9409227). + const ( + lower = 4.8095540743116787026618007863123676393525016818363e-103 // (5*2^-1022)^(1/3) + upper = 1 / lower + tol = 9.0351169339315770474760122547068324993857488849382e-03 // (ε/5)^(1/8) + ) + if x < 0 || y < 0 || math.IsNaN(x) || math.IsNaN(y) || math.IsNaN(z) { + return math.NaN() + } + if upper < x || upper < y || upper < z { + return math.NaN() + } + if x+y < lower || z < lower { + return math.NaN() + } + + A0 := (x + y + 3*z) / 5 + An := A0 + Q := math.Max(math.Max(math.Abs(A0-x), math.Abs(A0-y)), math.Abs(A0-z)) / tol + xn, yn, zn := x, y, z + mul, s := 1.0, 0.0 + + for Q >= mul*math.Abs(An) { + xnsqrt, ynsqrt, znsqrt := math.Sqrt(xn), math.Sqrt(yn), math.Sqrt(zn) + lambda := xnsqrt*ynsqrt + ynsqrt*znsqrt + znsqrt*xnsqrt + s += 1 / (mul * znsqrt * (zn + lambda)) + An = (An + lambda) * 0.25 + xn = (xn + lambda) * 0.25 + yn = (yn + lambda) * 0.25 + zn = (zn + lambda) * 0.25 + mul *= 4 + } + + X := (A0 - x) / (mul * An) + Y := (A0 - y) / (mul * An) + Z := -(X + Y) / 3 + E2 := X*Y - 6*Z*Z + E3 := (3*X*Y - 8*Z*Z) * Z + E4 := 3 * (X*Y - Z*Z) * Z * Z + E5 := X * Y * Z * Z * Z + + // http://dlmf.nist.gov/19.36.E2 + return (1-3/14.0*E2+1/6.0*E3+9/88.0*E2*E2-3/22.0*E4-9/52.0*E2*E3+3/26.0*E5-1/16.0*E2*E2*E2+3/40.0*E3*E3+3/20.0*E2*E4+45/272.0*E2*E2*E3-9/68.0*(E3*E4+E2*E5))/(mul*An*math.Sqrt(An)) + 3*s +} + +// EllipticF computes the Legendre's elliptic integral of the 1st kind F(phi,m), 0≤m<1: +// +// F(\phi,m) = \int_{0}^{\phi} 1 / \sqrt{1-m\sin^2(\theta)} d\theta +// +// Legendre's elliptic integrals can be expressed as symmetric elliptic integrals, in this case: +// +// F(\phi,m) = \sin\phi R_F(\cos^2\phi,1-m\sin^2\phi,1) +// +// The definition of F(phi,k) where k=sqrt(m) can be found in NIST Digital Library of Mathematical +// Functions (http://dlmf.nist.gov/19.2.E4). +func EllipticF(phi, m float64) float64 { + s, c := math.Sincos(phi) + return s * EllipticRF(c*c, 1-m*s*s, 1) +} + +// EllipticE computes the Legendre's elliptic integral of the 2nd kind E(phi,m), 0≤m<1: +// +// E(\phi,m) = \int_{0}^{\phi} \sqrt{1-m\sin^2(\theta)} d\theta +// +// Legendre's elliptic integrals can be expressed as symmetric elliptic integrals, in this case: +// +// E(\phi,m) = \sin\phi R_F(\cos^2\phi,1-m\sin^2\phi,1)-(m/3)\sin^3\phi R_D(\cos^2\phi,1-m\sin^2\phi,1) +// +// The definition of E(phi,k) where k=sqrt(m) can be found in NIST Digital Library of Mathematical +// Functions (http://dlmf.nist.gov/19.2.E5). +func EllipticE(phi, m float64) float64 { + s, c := math.Sincos(phi) + x, y := c*c, 1-m*s*s + return s * (EllipticRF(x, y, 1) - (m/3)*s*s*EllipticRD(x, y, 1)) +} diff --git a/vendor/gonum.org/v1/gonum/mathext/ell_complete.go b/vendor/gonum.org/v1/gonum/mathext/ell_complete.go new file mode 100644 index 0000000000..bdba081aad --- /dev/null +++ b/vendor/gonum.org/v1/gonum/mathext/ell_complete.go @@ -0,0 +1,355 @@ +// Copyright ©2017 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package mathext + +import ( + "math" +) + +// CompleteK computes the complete elliptic integral of the 1st kind, 0≤m≤1. It returns math.NaN() if m is not in [0,1]. +// +// K(m) = \int_{0}^{π/2} 1/{\sqrt{1-m{\sin^2θ}}} dθ +func CompleteK(m float64) float64 { + // Reference: + // Toshio Fukushima, Precise and fast computation of complete elliptic integrals + // by piecewise minimax rational function approximation, + // Journal of Computational and Applied Mathematics, Volume 282, 2015, Pages 71-76. + // https://doi.org/10.1016/j.cam.2014.12.038 + // Original Fortran code available at: + // https://www.researchgate.net/publication/295857819_xceitxt_F90_package_of_complete_elliptic_integral_computation + if m < 0 || 1 < m || math.IsNaN(m) { + return math.NaN() + } + + mc := 1 - m + + if mc > 0.592990 { + t := 2.45694208987494165*mc - 1.45694208987494165 + t2 := t * t + p := ((3703.75266375099019 + t2*(2744.82029097576810+t2*36.2381612593459565)) + t*(5462.47093231923466+t2*(543.839017382099411+t2*0.393188651542789784))) + q := ((2077.94377067058435 + t2*(1959.05960044399275+t2*43.5464368440078942)) + t*(3398.00069767755460+t2*(472.794455487539279+t2))) + return p / q + } + if mc > 0.350756 { + t := 4.12823963605439369*mc - 1.44800482178389491 + t2 := t * t + p := ((4264.28203103974630 + t2*(3214.59187442783167+t2*43.2589626155454993)) + t*(6341.90978213264024+t2*(642.790566685354573+t2*0.475223892294445943))) + q := ((2125.06914237062279 + t2*(2006.03187933518870+t2*44.1848041560412224)) + t*(3479.95663350926514+t2*(482.900172581418890+t2))) + return p / q + } + if mc > 0.206924 { + t := 6.95255575949719117*mc - 1.43865064797819679 + t2 := t * t + p := ((4870.25402224986382 + t2*(3738.29369283392307+t2*51.3609902253065926)) + t*(7307.18826377416591+t2*(754.928587580583704+t2*0.571948962277566451))) + q := ((2172.51745704102287 + t2*(2056.13612019430497+t2*44.9026847057686146)) + t*(3565.04737778032566+t2*(493.962405117599400+t2))) + return p / q + } + if mc > 0.121734 { + t := 11.7384669562155183*mc - 1.42897053644793990 + t2 := t * t + p := ((5514.8512729127464 + t2*(4313.60788246750934+t2*60.598720224393536)) + t*(8350.4595896779631+t2*(880.27903031894216+t2*0.68504458747933773))) + q := ((2218.41682813309737 + t2*(2107.97379949034285+t2*45.6911096775045314)) + t*(3650.41829123846319+t2*(505.74295207655096+t2))) + return p / q + } + if mc > 0.071412 { + t := 19.8720241643813839*mc - 1.41910098962680339 + t2 := t * t + p := ((6188.8743957372448 + t2*(4935.41351498551527+t2*70.981049144472361)) + t*(9459.3331440432847+t2*(1018.21910476032105+t2*0.81599895108245948))) + q := ((2260.73112539748448 + t2*(2159.68721749761492+t2*46.5298955058476510)) + t*(3732.66955095581621+t2*(517.86964191812384+t2))) + return p / q + } + if mc > 0.041770 { + t := 33.7359152553808785*mc - 1.40914918021725929 + t2 := t * t + p := ((6879.5170681289562 + t2*(5594.8381504799829+t2*82.452856129147838)) + t*(10615.0836403687221+t2*(1167.26108955935542+t2*0.96592719058503951))) + q := ((2296.88303450660439 + t2*(2208.74949754945558+t2*47.3844470709989137)) + t*(3807.37745652028212+t2*(529.79651353072921+t2))) + return p / q + } + if mc > 0.024360 { + t := 57.4382538770821367*mc - 1.39919586444572085 + t2 := t * t + p := ((7570.6827538712100 + t2*(6279.2661370014890+t2*94.886883830605940)) + t*(11792.9392624454532+t2*(1325.01058966228180+t2*1.13537029594409690))) + q := ((2324.04824540459984 + t2*(2252.22250562615338+t2*48.2089280211559345)) + t*(3869.56755306385732+t2*(540.85752251676412+t2))) + return p / q + } + if mc > 0.014165 { + t := 98.0872976949485042*mc - 1.38940657184894556 + t2 := t * t + p := ((8247.2601660137746 + t2*(6974.7495213178613+t2*108.098282908839979)) + t*(12967.7060124572914+t2*(1488.54008220335966+t2*1.32411616748380686))) + q := ((2340.47337508405427 + t2*(2287.70677154700516+t2*48.9575432570382154)) + t*(3915.63324533769906+t2*(550.45072377717361+t2))) + return p / q + } + if mc > 0.008213 { + t := 168.010752688172043*mc - 1.37987231182795699 + t2 := t * t + p := ((8894.2961573611293 + t2*(7666.5611739483371+t2*121.863474964652041)) + t*(14113.7038749808951+t2*(1654.60731579994159+t2*1.53112170837206117))) + q := ((2344.88618943372377 + t2*(2313.28396270968662+t2*49.5906602613891184)) + t*(3942.81065054556536+t2*(558.07615380622169+t2))) + return p / q + } + if mc > 0 { + t := 1.0 - 121.758188238159016*mc + p := -math.Log(mc*0.0625) * (34813.4518336350547 + t*(235.767716637974271+t*0.199792723884069485)) / (69483.5736412906324 + t*(614.265044703187382+t)) + q := -mc * (9382.53386835986099 + t*(51.6478985993381223+t*0.00410754154682816898)) / (37327.7262507318317 + t*(408.017247271148538+t)) + return p + q + } + + return math.Inf(1) +} + +// CompleteE computes the complete elliptic integral of the 2nd kind, 0≤m≤1. It returns math.NaN() if m is not in [0,1]. +// +// E(m) = \int_{0}^{π/2} {\sqrt{1-m{\sin^2θ}}} dθ +func CompleteE(m float64) float64 { + // Reference: + // Toshio Fukushima, Precise and fast computation of complete elliptic integrals + // by piecewise minimax rational function approximation, + // Journal of Computational and Applied Mathematics, Volume 282, 2015, Pages 71-76. + // https://doi.org/10.1016/j.cam.2014.12.038 + // Original Fortran code available at: + // https://www.researchgate.net/publication/295857819_xceitxt_F90_package_of_complete_elliptic_integral_computation + if m < 0 || 1 < m || math.IsNaN(m) { + return math.NaN() + } + + mc := 1 - m + + if mc > 0.566638 { + t := 2.30753965506897236*mc - 1.30753965506897236 + t2 := t * t + p := ((19702.2363352671642 + t2*(18177.1879313824040+t2*409.975559128654710)) + t*(31904.1559574281609+t2*(4362.94760768571862+t2*10.3244775335024885))) + q := ((14241.2135819448616 + t2*(10266.4884503526076+t2*117.162100771599098)) + t*(20909.9899599927367+t2*(1934.86289070792954+t2))) + return p / q + } + if mc > 0.315153 { + t := 3.97638030101198879*mc - 1.25316818100483130 + t2 := t * t + p := ((16317.0721393008221 + t2*(15129.4009798463159+t2*326.113727011739428)) + t*(26627.8852140835023+t2*(3574.15857605556033+t2*7.93163724081373477))) + q := ((13047.1505096551210 + t2*(9964.25173735060361+t2*117.670514069579649)) + t*(19753.5762165922376+t2*(1918.72232033637537+t2))) + return p / q + } + if mc > 0.171355 { + t := 6.95419964116329852*mc - 1.19163687951153702 + t2 := t * t + p := ((13577.3850240991520 + t2*(12871.9137872656293+t2*263.964361648520708)) + t*(22545.4744699553993+t2*(3000.74575264868572+t2*6.08522443139677663))) + q := ((11717.3306408059832 + t2*(9619.40382323874064+t2*118.690522739531267)) + t*(18431.1264424290258+t2*(1904.06010727307491+t2))) + return p / q + } + if mc > 0.090670 { + t := 12.3938774245522712*mc - 1.12375286608415443 + t2 := t * t + p := ((11307.9485341543712 + t2*(11208.6068472959372+t2*219.253495956962613)) + t*(19328.6173704569489+t2*(2596.54874477084334+t2*4.66931143174036616))) + q := ((10307.6837501971393 + t2*(9241.7604666150102+t2*120.498555754227847)) + t*(16982.2450249024383+t2*(1893.41905403040679+t2))) + return p / q + } + if mc > 0.046453 { + t := 22.6157360291290680*mc - 1.05056878576113260 + t2 := t * t + p := ((9383.1490856819874 + t2*(9977.2498973537718+t2*188.618148076418837)) + t*(16718.9730458676860+t2*(2323.49987246555537+t2*3.59313532204509922))) + q := ((8877.1964704758383 + t2*(8840.2771293410661+t2*123.422125687316355)) + t*(15450.0537230364062+t2*(1889.13672102820913+t2))) + return p / q + } + if mc > 0.022912 { + t := 42.4790790535661187*mc - 0.973280659275306911 + t2 := t * t + p := ((7719.1171817802054 + t2*(9045.3996063894006+t2*169.386557799782496)) + t*(14521.7363804934985+t2*(2149.92068078627829+t2*2.78515570453129137))) + q := ((7479.7539074698012 + t2*(8420.3848818926324+t2*127.802109608726363)) + t*(13874.4978011497847+t2*(1892.69753150329759+t2))) + return p / q + } + if mc > 0.010809 { + t := 82.6241427745187144*mc - 0.893084359249772784 + t2 := t * t + p := ((6261.6095608987273 + t2*(8304.3265605809870+t2*159.371262600702237)) + t*(12593.0874916293982+t2*(2048.68391263416822+t2*2.18867046462858104))) + q := ((6156.4532048239501 + t2*(7979.7435857665227+t2*133.911640385965187)) + t*(12283.8373999680518+t2*(1903.60556312663537+t2))) + return p / q + } + if mc > 0.004841 { + t := 167.560321715817694*mc - 0.811159517426273458 + t2 := t * t + p := ((4978.06146583586728 + t2*(7664.6703673290453+t2*156.689647694892782)) + t*(10831.7178150656694+t2*(1995.66437151562090+t2*1.75859085945198570))) + q := ((4935.56743322938333 + t2*(7506.8028283118051+t2*141.854303920116856)) + t*(10694.5510113880077+t2*(1918.38517009740321+t2))) + return p / q + } + if mc > 0 { + t := 1.0 - 206.568890725056806*mc + p := -mc * math.Log(mc*0.0625) * (41566.6612602868736 + t*(154.034981522913482+t*0.0618072471798575991)) / (165964.442527585615 + t*(917.589668642251803+t)) + q := (132232.803956682877 + t*(353.375480007017643-t*1.40105837312528026)) / (132393.665743088043 + t*(192.112635228732532-t)) + return p + q + } + + return 1 +} + +// CompleteB computes an associate complete elliptic integral of the 2nd kind, 0≤m≤1. It returns math.NaN() if m is not in [0,1]. +// +// B(m) = \int_{0}^{π/2} {\cos^2θ} / {\sqrt{1-m{\sin^2θ}}} dθ +func CompleteB(m float64) float64 { + // Reference: + // Toshio Fukushima, Precise and fast computation of complete elliptic integrals + // by piecewise minimax rational function approximation, + // Journal of Computational and Applied Mathematics, Volume 282, 2015, Pages 71-76. + // https://doi.org/10.1016/j.cam.2014.12.038 + // Original Fortran code available at: + // https://www.researchgate.net/publication/295857819_xceitxt_F90_package_of_complete_elliptic_integral_computation + if m < 0 || 1 < m || math.IsNaN(m) { + return math.NaN() + } + + mc := 1 - m + + if mc > 0.555073 { + t := 2.24755971204264969*mc - 1.24755971204264969 + t2 := t * t + p := ((2030.25011505956379 + t2*(1727.60635612511943+t2*25.0715510300422010)) + t*(3223.16236100954529+t2*(361.164121995173076+t2*0.280355207707726826))) + q := ((2420.64907902774675 + t2*(2327.48464880306840+t2*47.9870997057202318)) + t*(4034.28168313496638+t2*(549.234220839203960+t2))) + return p / q + } + if mc > 0.302367 { + t := 3.95716761770595079*mc - 1.19651690106289522 + t2 := t * t + p := ((2209.26925068374373 + t2*(1981.37862223307242+t2*29.7612810087709299)) + t*(3606.58475322372526+t2*(422.693774742063054+t2*0.334623999861181980))) + q := ((2499.57898767250755 + t2*(2467.63998386656941+t2*50.0198090806651216)) + t*(4236.30953048456334+t2*(581.879599221457589+t2))) + return p / q + } + if mc > 0.161052 { + t := 7.07638962601280827*mc - 1.13966670204861480 + t2 := t * t + p := ((2359.14823394150129 + t2*(2254.30785457761760+t2*35.2259786264917876)) + t*(3983.28520266051676+t2*(492.601686517364701+t2*0.396605124984359783))) + q := ((2563.95563932625156 + t2*(2633.23323959119935+t2*52.6711647124832948)) + t*(4450.19076667898892+t2*(622.983787815718489+t2))) + return p / q + } + if mc > 0.083522 { + t := 12.8982329420869341*mc - 1.07728621178898491 + t2 := t * t + p := ((2464.65334987833736 + t2*(2541.68516994216007+t2*41.5832527504007778)) + t*(4333.38639187691528+t2*(571.53606797524881+t2*0.465975784547025267))) + q := ((2600.66956117247726 + t2*(2823.69445052534842+t2*56.136001230010910)) + t*(4661.64381841490914+t2*(674.25435972414302+t2))) + return p / q + } + if mc > 0.041966 { + t := 24.0639137549331023*mc - 1.00986620463952257 + t2 := t * t + p := ((2509.86724450741259 + t2*(2835.27071287535469+t2*48.9701196718008345)) + t*(4631.12336462339975+t2*(659.86172161727281+t2*0.54158304771955794))) + q := ((2594.15983397593723 + t2*(3034.20118545214106+t2*60.652838995496991)) + t*(4848.17491604384532+t2*(737.15143838356850+t2))) + return p / q + } + if mc > 0.020313 { + t := 46.1829769546944996*mc - 0.938114810880709371 + t2 := t * t + p := ((2480.58307884128017 + t2*(3122.00900554841322+t2*57.541132641218839)) + t*(4845.57861173250699+t2*(757.31633816400643+t2*0.62119950515996627))) + q := ((2528.85218300581396 + t2*(3253.86151324157460+t2*66.496093157522450)) + t*(4979.31783250484768+t2*(812.40556572486862+t2))) + return p / q + } + if mc > 0.009408 { + t := 91.7010545621274645*mc - 0.862723521320495186 + t2 := t * t + p := ((2365.25385348859592 + t2*(3381.09304915246175+t2*67.442026950538221)) + t*(4939.53925884558687+t2*(862.16657576129841+t2*0.70143698925710129))) + q := ((2390.48737882063755 + t2*(3462.34808443022907+t2*73.934680452209164)) + t*(5015.4675579215077+t2*(898.99542983710459+t2))) + return p / q + } + if mc > 0.004136 { + t := 189.681335356600910*mc - 0.784522003034901366 + t2 := t * t + p := ((2160.82916040868119 + t2*(3584.53058926175721+t2*78.769178005879162)) + t*(4877.14832623847052+t2*(970.53716686804832+t2*0.77797110431753920))) + q := ((2172.70451405048305 + t2*(3630.52345460629336+t2*83.173163222639080)) + t*(4916.35263668839769+t2*(993.36676027886685+t2))) + return p / q + } + if mc > 0 { + t := 1 - 106.292517006802721*mc + p := mc * math.Log(mc*0.0625) * (6607.46457640413908 + t*(19.0287633783211078-t*0.00625368946932704460)) / (26150.3443630974309 + t*(354.603981274536040+t)) + q := (26251.5678902584870 + t*(168.788023807915689+t*0.352150236262724288)) / (26065.7912239203873 + t*(353.916840382280456+t)) + return p + q + } + + return 1 +} + +// CompleteD computes an associate complete elliptic integral of the 2nd kind, 0≤m≤1. It returns math.NaN() if m is not in [0,1]. +// +// D(m) = \int_{0}^{π/2} {\sin^2θ} / {\sqrt{1-m{\sin^2θ}}} dθ +func CompleteD(m float64) float64 { + // Reference: + // Toshio Fukushima, Precise and fast computation of complete elliptic integrals + // by piecewise minimax rational function approximation, + // Journal of Computational and Applied Mathematics, Volume 282, 2015, Pages 71-76. + // https://doi.org/10.1016/j.cam.2014.12.038 + // Original Fortran code available at: + // https://www.researchgate.net/publication/295857819_xceitxt_F90_package_of_complete_elliptic_integral_computation + if m < 0 || 1 < m || math.IsNaN(m) { + return math.NaN() + } + + mc := 1 - m + + if mc > 0.599909 { + t := 2.49943137936119533*mc - 1.49943137936119533 + t2 := t * t + p := ((1593.39813781813498 + t2*(1058.56241259843217+t2*11.7584241242587571)) + t*(2233.25576544961714+t2*(195.247394601357872+t2*0.101486443490307517))) + q := ((1685.47865546030468 + t2*(1604.88100543517015+t2*38.6743012128666717)) + t*(2756.20968383181114+t2*(397.504162950935944+t2))) + return p / q + } + if mc > 0.359180 { + t := 4.15404874360795750*mc - 1.49205122772910617 + t2 := t * t + p := ((1967.01442513777287 + t2*(1329.30058268219177+t2*15.0447805948342760)) + t*(2779.87604145516343+t2*(247.475085945854673+t2*0.130547566005491628))) + q := ((1749.70634057327467 + t2*(1654.40804288486242+t2*39.1895256017535337)) + t*(2853.92630369567765+t2*(406.925098588378587+t2))) + return p / q + } + if mc > 0.214574 { + t := 6.91534237860116454*mc - 1.48385267554596628 + t2 := t * t + p := ((2409.64196912091452 + t2*(1659.30176823041376+t2*19.1942111405094383)) + t*(3436.40744503228691+t2*(312.186468430688790+t2*0.167847673021897479))) + q := ((1824.89205701262525 + t2*(1715.38574780156913+t2*39.8798253173462218)) + t*(2971.02216287936566+t2*(418.929791715319490+t2))) + return p / q + } + if mc > 0.127875 { + t := 11.5341584101316047*mc - 1.47493050669557896 + t2 := t * t + p := ((2926.81143179637839 + t2*(2056.45624281065334+t2*24.3811986813439843)) + t*(4214.52119721241319+t2*(391.420514384925370+t2*0.215574280659075512))) + q := ((1910.33091918583314 + t2*(1787.99942542734799+t2*40.7663012893484449)) + t*(3107.04531802441481+t2*(433.673494280825971+t2))) + return p / q + } + if mc > 0.076007 { + t := 19.2797100331611013*mc - 1.46539292049047582 + t2 := t * t + p := ((3520.63614251102960 + t2*(2526.67111759550923+t2*30.7739877519417978)) + t*(5121.2842239226937+t2*(486.926821696342529+t2*0.276315678908126399))) + q := ((2003.81997889501324 + t2*(1871.05914195570669+t2*41.8489850490387023)) + t*(3259.09205279874214+t2*(451.007555352632053+t2))) + return p / q + } + if mc > 0.045052 { + t := 32.3049588111775157*mc - 1.45540300436116944 + t2 := t * t + p := ((4188.00087087025347 + t2*(3072.05695847158556+t2*38.5070211470790031)) + t*(6156.0080960857764+t2*(599.76666155374012+t2*0.352955925261363680))) + q := ((2101.60113938424690 + t2*(1961.76794074710108+t2*43.0997999502743622)) + t*(3421.55151253792527+t2*(470.407158843118117+t2))) + return p / q + } + if mc > 0.026626 { + t := 54.2711386084880061*mc - 1.44502333658960165 + t2 := t * t + p := ((4916.74442376570733 + t2*(3688.12811638360551+t2*47.6447145147811350)) + t*(7304.6632479558695+t2*(729.75841970840314+t2*0.448422756936257635))) + q := ((2197.49982676612397 + t2*(2055.19657857622715+t2*44.4576261146308645)) + t*(3584.94502590860852+t2*(490.880160668822953+t2))) + return p / q + } + if mc > 0.015689 { + t := 91.4327512114839536*mc - 1.43448843375697175 + t2 := t * t + p := ((5688.7542903989517 + t2*(4364.21513060078954+t2*58.159468141567195)) + t*(8542.6096475195826+t2*(875.35992968472914+t2*0.56528145509695951))) + q := ((2285.44062680812883 + t2*(2145.80779422696555+t2*45.8427480379028781)) + t*(3739.30422133833258+t2*(511.23253971875808+t2))) + return p / q + } + if mc > 0.009216 { + t := 154.487872701992894*mc - 1.42376023482156651 + t2 := t * t + p := ((6475.3392225234969 + t2*(5081.2997108708577+t2*69.910123337464043)) + t*(9829.1138694605662+t2*(1033.32687775311981+t2*0.70526087421186325))) + q := ((2357.74885505777295 + t2*(2226.89527217032394+t2*47.1609071069631012)) + t*(3872.32565152553360+t2*(530.03943432061149+t2))) + return p / q + } + if mc > 0 { + t := 1 - 108.506944444444444*mc + p := -math.Log(mc*0.0625) * (6.2904323649908115e6 + t*(58565.284164780476+t*(131.176674599188545+t*0.0426826410911220304))) / (1.24937550257219890e7 + t*(203580.534005225410+t*(921.17729845011868+t))) + q := -(27356.1090344387530 + t*(107.767403612304371-t*0.0827769227048233593)) / (27104.0854889805978 + t*(358.708172147752755+t)) + return p + q + } + + return math.Inf(1) +} diff --git a/vendor/gonum.org/v1/gonum/mathext/erf.go b/vendor/gonum.org/v1/gonum/mathext/erf.go new file mode 100644 index 0000000000..793238b03a --- /dev/null +++ b/vendor/gonum.org/v1/gonum/mathext/erf.go @@ -0,0 +1,91 @@ +// Copyright ©2017 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package mathext + +import "math" + +/* +Copyright (c) 2012 The Probab Authors. All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +* Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +* Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. +* Neither the name of Google Inc. nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +// NormalQuantile computes the quantile function (inverse CDF) of the standard +// normal. NormalQuantile panics if the input p is less than 0 or greater than 1. +func NormalQuantile(p float64) float64 { + switch { + case p < 0 || 1 < p: + panic("mathext: quantile out of bounds") + case p == 1: + return math.Inf(1) + case p == 0: + return math.Inf(-1) + } + // Compute rational approximation based on the value of p. + + dp := p - 0.5 + if math.Abs(dp) <= 0.425 { + z := 0.180625 - dp*dp + z1 := ((((((zQSA[0]*z+zQSA[1])*z+zQSA[2])*z+zQSA[3])*z+zQSA[4])*z+zQSA[5])*z+zQSA[6])*z + zQSA[7] + z2 := ((((((zQSB[0]*z+zQSB[1])*z+zQSB[2])*z+zQSB[3])*z+zQSB[4])*z+zQSB[5])*z+zQSB[6])*z + zQSB[7] + return dp * z1 / z2 + } + + if p < 0.5 { + r := math.Sqrt(-math.Log(p)) + if r <= 5.0 { + z := r - 1.6 + z1 := ((((((zQIA[0]*z+zQIA[1])*z+zQIA[2])*z+zQIA[3])*z+zQIA[4])*z+zQIA[5])*z+zQIA[6])*z + zQIA[7] + z2 := ((((((zQIB[0]*z+zQIB[1])*z+zQIB[2])*z+zQIB[3])*z+zQIB[4])*z+zQIB[5])*z+zQIB[6])*z + zQIB[7] + return -z1 / z2 + } + z := r - 5 + z1 := ((((((zQTA[0]*z+zQTA[1])*z+zQTA[2])*z+zQTA[3])*z+zQTA[4])*z+zQTA[5])*z+zQTA[6])*z + zQTA[7] + z2 := ((((((zQTB[0]*z+zQTB[1])*z+zQTB[2])*z+zQTB[3])*z+zQTB[4])*z+zQTB[5])*z+zQTB[6])*z + zQTB[7] + return -z1 / z2 + } + r := math.Sqrt(-math.Log(1 - p)) + if r <= 5.0 { + z := r - 1.6 + z1 := ((((((zQIA[0]*z+zQIA[1])*z+zQIA[2])*z+zQIA[3])*z+zQIA[4])*z+zQIA[5])*z+zQIA[6])*z + zQIA[7] + z2 := ((((((zQIB[0]*z+zQIB[1])*z+zQIB[2])*z+zQIB[3])*z+zQIB[4])*z+zQIB[5])*z+zQIB[6])*z + zQIB[7] + return z1 / z2 + } + + z := r - 5 + z1 := ((((((zQTA[0]*z+zQTA[1])*z+zQTA[2])*z+zQTA[3])*z+zQTA[4])*z+zQTA[5])*z+zQTA[6])*z + zQTA[7] + z2 := ((((((zQTB[0]*z+zQTB[1])*z+zQTB[2])*z+zQTB[3])*z+zQTB[4])*z+zQTB[5])*z+zQTB[6])*z + zQTB[7] + return z1 / z2 +} + +var ( + zQSA = [...]float64{2509.0809287301226727, 33430.575583588128105, 67265.770927008700853, 45921.953931549871457, 13731.693765509461125, 1971.5909503065514427, 133.14166789178437745, 3.387132872796366608} + zQSB = [...]float64{5226.495278852854561, 28729.085735721942674, 39307.89580009271061, 21213.794301586595867, 5394.1960214247511077, 687.1870074920579083, 42.313330701600911252, 1.0} + zQIA = [...]float64{7.7454501427834140764e-4, 0.0227238449892691845833, 0.24178072517745061177, 1.27045825245236838258, 3.64784832476320460504, 5.7694972214606914055, 4.6303378461565452959, 1.42343711074968357734} + zQIB = [...]float64{1.05075007164441684324e-9, 5.475938084995344946e-4, 0.0151986665636164571966, 0.14810397642748007459, 0.68976733498510000455, 1.6763848301838038494, 2.05319162663775882187, 1.0} + zQTA = [...]float64{2.01033439929228813265e-7, 2.71155556874348757815e-5, 0.0012426609473880784386, 0.026532189526576123093, 0.29656057182850489123, 1.7848265399172913358, 5.4637849111641143699, 6.6579046435011037772} + zQTB = [...]float64{2.04426310338993978564e-15, 1.4215117583164458887e-7, 1.8463183175100546818e-5, 7.868691311456132591e-4, 0.0148753612908506148525, 0.13692988092273580531, 0.59983220655588793769, 1.0} +) diff --git a/vendor/gonum.org/v1/gonum/mathext/gamma_inc.go b/vendor/gonum.org/v1/gonum/mathext/gamma_inc.go new file mode 100644 index 0000000000..c4abe2c2d1 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/mathext/gamma_inc.go @@ -0,0 +1,58 @@ +// Copyright ©2016 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package mathext + +import ( + "gonum.org/v1/gonum/mathext/internal/cephes" +) + +// GammaIncReg computes the regularized incomplete Gamma integral. +// +// GammaIncReg(a,x) = (1/ Γ(a)) \int_0^x e^{-t} t^{a-1} dt +// +// The input argument a must be positive and x must be non-negative or GammaIncReg +// will panic. +// +// See http://mathworld.wolfram.com/IncompleteGammaFunction.html +// or https://en.wikipedia.org/wiki/Incomplete_gamma_function for more detailed +// information. +func GammaIncReg(a, x float64) float64 { + return cephes.Igam(a, x) +} + +// GammaIncRegComp computes the complemented regularized incomplete Gamma integral. +// +// GammaIncRegComp(a,x) = 1 - GammaIncReg(a,x) +// = (1/ Γ(a)) \int_x^\infty e^{-t} t^{a-1} dt +// +// The input argument a must be positive and x must be non-negative or +// GammaIncRegComp will panic. +func GammaIncRegComp(a, x float64) float64 { + return cephes.IgamC(a, x) +} + +// GammaIncRegInv computes the inverse of the regularized incomplete Gamma integral. That is, +// it returns the x such that: +// +// GammaIncReg(a, x) = y +// +// The input argument a must be positive and y must be between 0 and 1 +// inclusive or GammaIncRegInv will panic. GammaIncRegInv should return a positive +// number, but can return NaN if there is a failure to converge. +func GammaIncRegInv(a, y float64) float64 { + return gammaIncRegInv(a, y) +} + +// GammaIncRegCompInv computes the inverse of the complemented regularized incomplete Gamma +// integral. That is, it returns the x such that: +// +// GammaIncRegComp(a, x) = y +// +// The input argument a must be positive and y must be between 0 and 1 +// inclusive or GammaIncRegCompInv will panic. GammaIncRegCompInv should return a +// positive number, but can return 0 even with non-zero y due to underflow. +func GammaIncRegCompInv(a, y float64) float64 { + return cephes.IgamI(a, y) +} diff --git a/vendor/gonum.org/v1/gonum/mathext/gamma_inc_inv.go b/vendor/gonum.org/v1/gonum/mathext/gamma_inc_inv.go new file mode 100644 index 0000000000..175cb6bc93 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/mathext/gamma_inc_inv.go @@ -0,0 +1,58 @@ +// Derived from SciPy's special/c_misc/gammaincinv.c +// https://github.com/scipy/scipy/blob/master/scipy/special/c_misc/gammaincinv.c + +// Copyright ©2017 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package mathext + +import ( + "math" + + "gonum.org/v1/gonum/mathext/internal/cephes" +) + +const ( + allowedATol = 1e-306 + allowedRTol = 1e-6 +) + +func gammaIncReg(x float64, params []float64) float64 { + return cephes.Igam(params[0], x) - params[1] +} + +// gammaIncRegInv is the inverse of the regularized incomplete Gamma integral. That is, it +// returns x such that: +// +// Igam(a, x) = y +// +// The input argument a must be positive and y must be between 0 and 1 +// inclusive or gammaIncRegInv will panic. gammaIncRegInv should return a +// positive number, but can return NaN if there is a failure to converge. +func gammaIncRegInv(a, y float64) float64 { + // For y not small, we just use + // IgamI(a, 1-y) + // (inverse of the complemented incomplete Gamma integral). For y small, + // however, 1-y is about 1, and we lose digits. + if a <= 0 || y <= 0 || y >= 0.25 { + return cephes.IgamI(a, 1-y) + } + + lo := 0.0 + flo := -y + hi := cephes.IgamI(a, 0.75) + fhi := 0.25 - y + + params := []float64{a, y} + + // Also, after we generate a small interval by bisection above, false + // position will do a large step from an interval of width ~1e-4 to ~1e-14 + // in one step (a=10, x=0.05, but similar for other values). + result, bestX, _, errEst := falsePosition(lo, hi, flo, fhi, 2*machEp, 2*machEp, 1e-2*a, gammaIncReg, params) + if result == fSolveMaxIterations && errEst > allowedATol+allowedRTol*math.Abs(bestX) { + bestX = math.NaN() + } + + return bestX +} diff --git a/vendor/gonum.org/v1/gonum/mathext/internal/amos/amos.go b/vendor/gonum.org/v1/gonum/mathext/internal/amos/amos.go new file mode 100644 index 0000000000..6ea2d4668c --- /dev/null +++ b/vendor/gonum.org/v1/gonum/mathext/internal/amos/amos.go @@ -0,0 +1,2136 @@ +// Copyright ©2016 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package amos + +import ( + "math" + "math/cmplx" +) + +/* +The AMOS functions are included in SLATEC, and the SLATEC guide (http://www.netlib.org/slatec/guide) explicitly states: +"The Library is in the public domain and distributed by the Energy +Science and Technology Software Center." +Mention of AMOS's inclusion in SLATEC goes back at least to this 1985 technical report from Sandia National Labs: http://infoserve.sandia.gov/sand_doc/1985/851018.pdf +*/ + +// math.NaN() are for padding to keep indexing easy. +var imach = []int{-0, 5, 6, 0, 0, 32, 4, 2, 31, 2147483647, 2, 24, -125, 127, 53, -1021, 1023} + +var dmach = []float64{math.NaN(), 2.23e-308, 1.79e-308, 1.11e-16, 2.22e-16, 0.30103000998497009} + +func abs(a int) int { + if a >= 0 { + return a + } + return -a +} + +func Zairy(ZR, ZI float64, ID, KODE int) (AIR, AII float64, NZ, IERR int) { + // zairy is adapted from the original Netlib code by Donald Amos. + // http://www.netlib.no/netlib/amos/zairy.f + + // Original comment: + /* + C***BEGIN PROLOGUE ZAIRY + C***DATE WRITTEN 830501 (YYMMDD) + C***REVISION DATE 890801 (YYMMDD) + C***CATEGORY NO. B5K + C***KEYWORDS AIRY FUNCTION,BESSEL FUNCTIONS OF ORDER ONE THIRD + C***AUTHOR AMOS, DONALD E., SANDIA NATIONAL LABORATORIES + C***PURPOSE TO COMPUTE AIRY FUNCTIONS AI(Z) AND DAI(Z) FOR COMPLEX Z + C***DESCRIPTION + C + C ***A DOUBLE PRECISION ROUTINE*** + C ON KODE=1, ZAIRY COMPUTES THE COMPLEX AIRY FUNCTION AI(Z) OR + C ITS DERIVATIVE DAI(Z)/DZ ON ID=0 OR ID=1 RESPECTIVELY. ON + C KODE=2, A SCALING OPTION CEXP(ZTA)*AI(Z) OR CEXP(ZTA)* + C DAI(Z)/DZ IS PROVIDED TO REMOVE THE EXPONENTIAL DECAY IN + C -PI/31.0 FROM THE K BESSEL + C FUNCTIONS BY + C + C AI(Z)=C*SQRT(Z)*K(1/3,ZTA) , DAI(Z)=-C*Z*K(2/3,ZTA) + C C=1.0/(PI*SQRT(3.0)) + C ZTA=(2/3)*Z**(3/2) + C + C WITH THE POWER SERIES FOR CABS(Z)<=1.0. + C + C IN MOST COMPLEX VARIABLE COMPUTATION, ONE MUST EVALUATE ELE- + C MENTARY FUNCTIONS. WHEN THE MAGNITUDE OF Z IS LARGE, LOSSES + C OF SIGNIFICANCE BY ARGUMENT REDUCTION OCCUR. CONSEQUENTLY, IF + C THE MAGNITUDE OF ZETA=(2/3)*Z**1.5 EXCEEDS U1=SQRT(0.5/UR), + C THEN LOSSES EXCEEDING HALF PRECISION ARE LIKELY AND AN ERROR + C FLAG IERR=3 IS TRIGGERED WHERE UR=math.Max(dmach[4),1.0D-18) IS + C DOUBLE PRECISION UNIT ROUNDOFF LIMITED TO 18 DIGITS PRECISION. + C ALSO, if THE MAGNITUDE OF ZETA IS LARGER THAN U2=0.5/UR, THEN + C ALL SIGNIFICANCE IS LOST AND IERR=4. IN ORDER TO USE THE INT + C FUNCTION, ZETA MUST BE FURTHER RESTRICTED NOT TO EXCEED THE + C LARGEST INTEGER, U3=I1MACH(9). THUS, THE MAGNITUDE OF ZETA + C MUST BE RESTRICTED BY MIN(U2,U3). ON 32 BIT MACHINES, U1,U2, + C AND U3 ARE APPROXIMATELY 2.0E+3, 4.2E+6, 2.1E+9 IN SINGLE + C PRECISION ARITHMETIC AND 1.3E+8, 1.8E+16, 2.1E+9 IN DOUBLE + C PRECISION ARITHMETIC RESPECTIVELY. THIS MAKES U2 AND U3 LIMIT- + C ING IN THEIR RESPECTIVE ARITHMETICS. THIS MEANS THAT THE MAG- + C NITUDE OF Z CANNOT EXCEED 3.1E+4 IN SINGLE AND 2.1E+6 IN + C DOUBLE PRECISION ARITHMETIC. THIS ALSO MEANS THAT ONE CAN + C EXPECT TO RETAIN, IN THE WORST CASES ON 32 BIT MACHINES, + C NO DIGITS IN SINGLE PRECISION AND ONLY 7 DIGITS IN DOUBLE + C PRECISION ARITHMETIC. SIMILAR CONSIDERATIONS HOLD FOR OTHER + C MACHINES. + C + C THE APPROXIMATE RELATIVE ERROR IN THE MAGNITUDE OF A COMPLEX + C BESSEL FUNCTION CAN BE EXPRESSED BY P*10**S WHERE P=MAX(UNIT + C ROUNDOFF,1.0E-18) IS THE NOMINAL PRECISION AND 10**S REPRE- + C SENTS THE INCREASE IN ERROR DUE TO ARGUMENT REDUCTION IN THE + C ELEMENTARY FUNCTIONS. HERE, S=MAX(1,ABS(LOG10(CABS(Z))), + C ABS(LOG10(FNU))) APPROXIMATELY (I.E. S=MAX(1,ABS(EXPONENT OF + C CABS(Z),ABS(EXPONENT OF FNU)) ). HOWEVER, THE PHASE ANGLE MAY + C HAVE ONLY ABSOLUTE ACCURACY. THIS IS MOST LIKELY TO OCCUR WHEN + C ONE COMPONENT (IN ABSOLUTE VALUE) IS LARGER THAN THE OTHER BY + C SEVERAL ORDERS OF MAGNITUDE. if ONE COMPONENT IS 10**K LARGER + C THAN THE OTHER, THEN ONE CAN EXPECT ONLY MAX(ABS(LOG10(P))-K, + C 0) SIGNIFICANT DIGITS; OR, STATED ANOTHER WAY, WHEN K EXCEEDS + C THE EXPONENT OF P, NO SIGNIFICANT DIGITS REMAIN IN THE SMALLER + C COMPONENT. HOWEVER, THE PHASE ANGLE RETAINS ABSOLUTE ACCURACY + C BECAUSE, IN COMPLEX ARITHMETIC WITH PRECISION P, THE SMALLER + C COMPONENT WILL NOT (AS A RULE) DECREASE BELOW P TIMES THE + C MAGNITUDE OF THE LARGER COMPONENT. IN THESE EXTREME CASES, + C THE PRINCIPAL PHASE ANGLE IS ON THE ORDER OF +P, -P, PI/2-P, + C OR -PI/2+P. + C + C***REFERENCES HANDBOOK OF MATHEMATICAL FUNCTIONS BY M. ABRAMOWITZ + C AND I. A. STEGUN, NBS AMS SERIES 55, U.S. DEPT. OF + C COMMERCE, 1955. + C + C COMPUTATION OF BESSEL FUNCTIONS OF COMPLEX ARGUMENT + C AND LARGE ORDER BY D. E. AMOS, SAND83-0643, MAY, 1983 + C + C A SUBROUTINE PACKAGE FOR BESSEL FUNCTIONS OF A COMPLEX + C ARGUMENT AND NONNEGATIVE ORDER BY D. E. AMOS, SAND85- + C 1018, MAY, 1985 + C + C A PORTABLE PACKAGE FOR BESSEL FUNCTIONS OF A COMPLEX + C ARGUMENT AND NONNEGATIVE ORDER BY D. E. AMOS, TRANS. + C MATH. SOFTWARE, 1986 + */ + var AI, CONE, CSQ, CY, S1, S2, TRM1, TRM2, Z, ZTA, Z3 complex128 + var AA, AD, AK, ALIM, ATRM, AZ, AZ3, BK, + CC, CK, COEF, CONEI, CONER, CSQI, CSQR, C1, C2, DIG, + DK, D1, D2, ELIM, FID, FNU, PTR, RL, R1M5, SFAC, STI, STR, + S1I, S1R, S2I, S2R, TOL, TRM1I, TRM1R, TRM2I, TRM2R, TTH, ZEROI, + ZEROR, ZTAI, ZTAR, Z3I, Z3R, ALAZ, BB float64 + var IFLAG, K, K1, K2, MR, NN int + var tmp complex128 + + // Extra element for padding. + CYR := []float64{math.NaN(), 0} + CYI := []float64{math.NaN(), 0} + + _ = AI + _ = CONE + _ = CSQ + _ = CY + _ = S1 + _ = S2 + _ = TRM1 + _ = TRM2 + _ = Z + _ = ZTA + _ = Z3 + + TTH = 6.66666666666666667e-01 + C1 = 3.55028053887817240e-01 + C2 = 2.58819403792806799e-01 + COEF = 1.83776298473930683e-01 + ZEROR = 0 + ZEROI = 0 + CONER = 1 + CONEI = 0 + + NZ = 0 + if ID < 0 || ID > 1 { + IERR = 1 + } + if KODE < 1 || KODE > 2 { + IERR = 1 + } + if IERR != 0 { + return + } + AZ = cmplx.Abs(complex(ZR, ZI)) + TOL = math.Max(dmach[4], 1.0e-18) + FID = float64(ID) + if AZ > 1.0e0 { + goto Seventy + } + + // POWER SERIES FOR CABS(Z)<=1. + S1R = CONER + S1I = CONEI + S2R = CONER + S2I = CONEI + if AZ < TOL { + goto OneSeventy + } + AA = AZ * AZ + if AA < TOL/AZ { + goto Forty + } + TRM1R = CONER + TRM1I = CONEI + TRM2R = CONER + TRM2I = CONEI + ATRM = 1.0e0 + STR = ZR*ZR - ZI*ZI + STI = ZR*ZI + ZI*ZR + Z3R = STR*ZR - STI*ZI + Z3I = STR*ZI + STI*ZR + AZ3 = AZ * AA + AK = 2.0e0 + FID + BK = 3.0e0 - FID - FID + CK = 4.0e0 - FID + DK = 3.0e0 + FID + FID + D1 = AK * DK + D2 = BK * CK + AD = math.Min(D1, D2) + AK = 24.0e0 + 9.0e0*FID + BK = 30.0e0 - 9.0e0*FID + for K = 1; K <= 25; K++ { + STR = (TRM1R*Z3R - TRM1I*Z3I) / D1 + TRM1I = (TRM1R*Z3I + TRM1I*Z3R) / D1 + TRM1R = STR + S1R = S1R + TRM1R + S1I = S1I + TRM1I + STR = (TRM2R*Z3R - TRM2I*Z3I) / D2 + TRM2I = (TRM2R*Z3I + TRM2I*Z3R) / D2 + TRM2R = STR + S2R = S2R + TRM2R + S2I = S2I + TRM2I + ATRM = ATRM * AZ3 / AD + D1 = D1 + AK + D2 = D2 + BK + AD = math.Min(D1, D2) + if ATRM < TOL*AD { + goto Forty + } + AK = AK + 18.0e0 + BK = BK + 18.0e0 + } +Forty: + if ID == 1 { + goto Fifty + } + AIR = S1R*C1 - C2*(ZR*S2R-ZI*S2I) + AII = S1I*C1 - C2*(ZR*S2I+ZI*S2R) + if KODE == 1 { + return + } + tmp = cmplx.Sqrt(complex(ZR, ZI)) + STR = real(tmp) + STI = imag(tmp) + ZTAR = TTH * (ZR*STR - ZI*STI) + ZTAI = TTH * (ZR*STI + ZI*STR) + tmp = cmplx.Exp(complex(ZTAR, ZTAI)) + STR = real(tmp) + STI = imag(tmp) + PTR = AIR*STR - AII*STI + AII = AIR*STI + AII*STR + AIR = PTR + return + +Fifty: + AIR = -S2R * C2 + AII = -S2I * C2 + if AZ <= TOL { + goto Sixty + } + STR = ZR*S1R - ZI*S1I + STI = ZR*S1I + ZI*S1R + CC = C1 / (1.0e0 + FID) + AIR = AIR + CC*(STR*ZR-STI*ZI) + AII = AII + CC*(STR*ZI+STI*ZR) + +Sixty: + if KODE == 1 { + return + } + tmp = cmplx.Sqrt(complex(ZR, ZI)) + STR = real(tmp) + STI = imag(tmp) + ZTAR = TTH * (ZR*STR - ZI*STI) + ZTAI = TTH * (ZR*STI + ZI*STR) + tmp = cmplx.Exp(complex(ZTAR, ZTAI)) + STR = real(tmp) + STI = imag(tmp) + PTR = STR*AIR - STI*AII + AII = STR*AII + STI*AIR + AIR = PTR + return + + // CASE FOR CABS(Z)>1.0. +Seventy: + FNU = (1.0e0 + FID) / 3.0e0 + + /* + SET PARAMETERS RELATED TO MACHINE CONSTANTS. + TOL IS THE APPROXIMATE UNIT ROUNDOFF LIMITED TO 1.0D-18. + ELIM IS THE APPROXIMATE EXPONENTIAL OVER-&&UNDERFLOW LIMIT. + EXP(-ELIM)EXP(ALIM)=EXP(ELIM)*TOL ARE INTERVALS NEAR + UNDERFLOW&&OVERFLOW LIMITS WHERE SCALED ARITHMETIC IS DONE. + RL IS THE LOWER BOUNDARY OF THE ASYMPTOTIC EXPANSION FOR LA>=Z. + DIG = NUMBER OF BASE 10 DIGITS IN TOL = 10**(-DIG). + */ + K1 = imach[15] + K2 = imach[16] + R1M5 = dmach[5] + + K = min(abs(K1), abs(K2)) + ELIM = 2.303e0 * (float64(K)*R1M5 - 3.0e0) + K1 = imach[14] - 1 + AA = R1M5 * float64(K1) + DIG = math.Min(AA, 18.0e0) + AA = AA * 2.303e0 + ALIM = ELIM + math.Max(-AA, -41.45e0) + RL = 1.2e0*DIG + 3.0e0 + ALAZ = math.Log(AZ) + + // TEST FOR PROPER RANGE. + AA = 0.5e0 / TOL + BB = float64(float32(imach[9])) * 0.5e0 + AA = math.Min(AA, BB) + AA = math.Pow(AA, TTH) + if AZ > AA { + goto TwoSixty + } + AA = math.Sqrt(AA) + if AZ > AA { + IERR = 3 + } + tmp = cmplx.Sqrt(complex(ZR, ZI)) + CSQR = real(tmp) + CSQI = imag(tmp) + ZTAR = TTH * (ZR*CSQR - ZI*CSQI) + ZTAI = TTH * (ZR*CSQI + ZI*CSQR) + + // RE(ZTA)<=0 WHEN RE(Z)<0, ESPECIALLY WHEN IM(Z) IS SMALL. + IFLAG = 0 + SFAC = 1.0e0 + AK = ZTAI + if ZR >= 0.0e0 { + goto Eighty + } + BK = ZTAR + CK = -math.Abs(BK) + ZTAR = CK + ZTAI = AK + +Eighty: + if ZI != 0.0e0 { + goto Ninety + } + if ZR > 0.0e0 { + goto Ninety + } + ZTAR = 0.0e0 + ZTAI = AK +Ninety: + AA = ZTAR + if AA >= 0.0e0 && ZR > 0.0e0 { + goto OneTen + } + if KODE == 2 { + goto OneHundred + } + + // OVERFLOW TEST. + if AA > (-ALIM) { + goto OneHundred + } + AA = -AA + 0.25e0*ALAZ + IFLAG = 1 + SFAC = TOL + if AA > ELIM { + goto TwoSeventy + } + +OneHundred: + // CBKNU AND CACON return EXP(ZTA)*K(FNU,ZTA) ON KODE=2. + MR = 1 + if ZI < 0.0e0 { + MR = -1 + } + _, _, _, _, _, _, CYR, CYI, NN, _, _, _, _ = Zacai(ZTAR, ZTAI, FNU, KODE, MR, 1, CYR, CYI, RL, TOL, ELIM, ALIM) + if NN < 0 { + goto TwoEighty + } + NZ = NZ + NN + goto OneThirty + +OneTen: + if KODE == 2 { + goto OneTwenty + } + + // UNDERFLOW TEST. + if AA < ALIM { + goto OneTwenty + } + AA = -AA - 0.25e0*ALAZ + IFLAG = 2 + SFAC = 1.0e0 / TOL + if AA < (-ELIM) { + goto TwoTen + } +OneTwenty: + _, _, _, _, _, CYR, CYI, NZ, _, _, _ = Zbknu(ZTAR, ZTAI, FNU, KODE, 1, CYR, CYI, TOL, ELIM, ALIM) + +OneThirty: + S1R = CYR[1] * COEF + S1I = CYI[1] * COEF + if IFLAG != 0 { + goto OneFifty + } + if ID == 1 { + goto OneFourty + } + AIR = CSQR*S1R - CSQI*S1I + AII = CSQR*S1I + CSQI*S1R + return +OneFourty: + AIR = -(ZR*S1R - ZI*S1I) + AII = -(ZR*S1I + ZI*S1R) + return +OneFifty: + S1R = S1R * SFAC + S1I = S1I * SFAC + if ID == 1 { + goto OneSixty + } + STR = S1R*CSQR - S1I*CSQI + S1I = S1R*CSQI + S1I*CSQR + S1R = STR + AIR = S1R / SFAC + AII = S1I / SFAC + return +OneSixty: + STR = -(S1R*ZR - S1I*ZI) + S1I = -(S1R*ZI + S1I*ZR) + S1R = STR + AIR = S1R / SFAC + AII = S1I / SFAC + return +OneSeventy: + AA = 1.0e+3 * dmach[1] + S1R = ZEROR + S1I = ZEROI + if ID == 1 { + goto OneNinety + } + if AZ <= AA { + goto OneEighty + } + S1R = C2 * ZR + S1I = C2 * ZI +OneEighty: + AIR = C1 - S1R + AII = -S1I + return +OneNinety: + AIR = -C2 + AII = 0.0e0 + AA = math.Sqrt(AA) + if AZ <= AA { + goto TwoHundred + } + S1R = 0.5e0 * (ZR*ZR - ZI*ZI) + S1I = ZR * ZI +TwoHundred: + AIR = AIR + C1*S1R + AII = AII + C1*S1I + return +TwoTen: + NZ = 1 + AIR = ZEROR + AII = ZEROI + return +TwoSeventy: + NZ = 0 + IERR = 2 + return +TwoEighty: + if NN == (-1) { + goto TwoSeventy + } + NZ = 0 + IERR = 5 + return +TwoSixty: + IERR = 4 + NZ = 0 + return +} + +// sbknu computes the k bessel function in the right half z plane. +func Zbknu(ZR, ZI, FNU float64, KODE, N int, YR, YI []float64, TOL, ELIM, ALIM float64) (ZRout, ZIout, FNUout float64, KODEout, Nout int, YRout, YIout []float64, NZ int, TOLout, ELIMout, ALIMout float64) { + /* Old dimension comment. + DIMENSION YR(N), YI(N), CC(8), CSSR(3), CSRR(3), BRY(3), CYR(2), + * CYI(2) + */ + + // TODO(btracey): Find which of these are inputs/outputs/both and clean up + // the function call. + // YR and YI have length n (but n+1 with better indexing) + var AA, AK, ASCLE, A1, A2, BB, BK, CAZ, + CBI, CBR, CCHI, CCHR, CKI, CKR, COEFI, COEFR, CONEI, CONER, + CRSCR, CSCLR, CSHI, CSHR, CSI, CSR, CTWOR, + CZEROI, CZEROR, CZI, CZR, DNU, DNU2, DPI, ETEST, FC, FHS, + FI, FK, FKS, FMUI, FMUR, FPI, FR, G1, G2, HPI, PI, PR, PTI, + PTR, P1I, P1R, P2I, P2M, P2R, QI, QR, RAK, RCAZ, RTHPI, RZI, + RZR, R1, S, SMUI, SMUR, SPI, STI, STR, S1I, S1R, S2I, S2R, TM, + TTH, T1, T2, ELM, CELMR, ZDR, ZDI, AS, ALAS, HELIM float64 + + var I, IFLAG, INU, K, KFLAG, KK, KMAX, KODED, IDUM, J, IC, INUB, NW int + + var sinh, cosh complex128 + //var sin, cos float64 + + var tmp, p complex128 + var CSSR, CSRR, BRY [4]float64 + var CYR, CYI [3]float64 + + KMAX = 30 + CZEROR = 0 + CZEROI = 0 + CONER = 1 + CONEI = 0 + CTWOR = 2 + R1 = 2 + + DPI = 3.14159265358979324e0 + RTHPI = 1.25331413731550025e0 + SPI = 1.90985931710274403e0 + HPI = 1.57079632679489662e0 + FPI = 1.89769999331517738e0 + TTH = 6.66666666666666666e-01 + + CC := [9]float64{math.NaN(), 5.77215664901532861e-01, -4.20026350340952355e-02, + -4.21977345555443367e-02, 7.21894324666309954e-03, + -2.15241674114950973e-04, -2.01348547807882387e-05, + 1.13302723198169588e-06, 6.11609510448141582e-09} + + CAZ = cmplx.Abs(complex(ZR, ZI)) + CSCLR = 1.0e0 / TOL + CRSCR = TOL + CSSR[1] = CSCLR + CSSR[2] = 1.0e0 + CSSR[3] = CRSCR + CSRR[1] = CRSCR + CSRR[2] = 1.0e0 + CSRR[3] = CSCLR + BRY[1] = 1.0e+3 * dmach[1] / TOL + BRY[2] = 1.0e0 / BRY[1] + BRY[3] = dmach[2] + IFLAG = 0 + KODED = KODE + RCAZ = 1.0e0 / CAZ + STR = ZR * RCAZ + STI = -ZI * RCAZ + RZR = (STR + STR) * RCAZ + RZI = (STI + STI) * RCAZ + INU = int(float32(FNU + 0.5)) + DNU = FNU - float64(INU) + if math.Abs(DNU) == 0.5e0 { + goto OneTen + } + DNU2 = 0.0e0 + if math.Abs(DNU) > TOL { + DNU2 = DNU * DNU + } + if CAZ > R1 { + goto OneTen + } + + // SERIES FOR CABS(Z)<=R1. + FC = 1.0e0 + tmp = cmplx.Log(complex(RZR, RZI)) + SMUR = real(tmp) + SMUI = imag(tmp) + FMUR = SMUR * DNU + FMUI = SMUI * DNU + tmp = complex(FMUR, FMUI) + sinh = cmplx.Sinh(tmp) + cosh = cmplx.Cosh(tmp) + CSHR = real(sinh) + CSHI = imag(sinh) + CCHR = real(cosh) + CCHI = imag(cosh) + if DNU == 0.0e0 { + goto Ten + } + FC = DNU * DPI + FC = FC / math.Sin(FC) + SMUR = CSHR / DNU + SMUI = CSHI / DNU +Ten: + A2 = 1.0e0 + DNU + + // GAM(1-Z)*GAM(1+Z)=PI*Z/SIN(PI*Z), T1=1/GAM(1-DNU), T2=1/GAM(1+DNU). + T2 = math.Exp(-dgamln(A2, IDUM)) + T1 = 1.0e0 / (T2 * FC) + if math.Abs(DNU) > 0.1e0 { + goto Forty + } + + // SERIES FOR F0 TO RESOLVE INDETERMINACY FOR SMALL ABS(DNU). + AK = 1.0e0 + S = CC[1] + for K = 2; K <= 8; K++ { + AK = AK * DNU2 + TM = CC[K] * AK + S = S + TM + if math.Abs(TM) < TOL { + goto Thirty + } + } +Thirty: + G1 = -S + goto Fifty +Forty: + G1 = (T1 - T2) / (DNU + DNU) +Fifty: + G2 = (T1 + T2) * 0.5e0 + FR = FC * (CCHR*G1 + SMUR*G2) + FI = FC * (CCHI*G1 + SMUI*G2) + tmp = cmplx.Exp(complex(FMUR, FMUI)) + STR = real(tmp) + STI = imag(tmp) + PR = 0.5e0 * STR / T2 + PI = 0.5e0 * STI / T2 + tmp = complex(0.5, 0) / complex(STR, STI) + PTR = real(tmp) + PTI = imag(tmp) + QR = PTR / T1 + QI = PTI / T1 + S1R = FR + S1I = FI + S2R = PR + S2I = PI + AK = 1.0e0 + A1 = 1.0e0 + CKR = CONER + CKI = CONEI + BK = 1.0e0 - DNU2 + if INU > 0 || N > 1 { + goto Eighty + } + + // GENERATE K(FNU,Z), 0.0E0 <= FNU < 0.5E0 AND N=1. + if CAZ < TOL { + goto Seventy + } + tmp = complex(ZR, ZI) * complex(ZR, ZI) + CZR = real(tmp) + CZI = imag(tmp) + CZR = 0.25e0 * CZR + CZI = 0.25e0 * CZI + T1 = 0.25e0 * CAZ * CAZ +Sixty: + FR = (FR*AK + PR + QR) / BK + FI = (FI*AK + PI + QI) / BK + STR = 1.0e0 / (AK - DNU) + PR = PR * STR + PI = PI * STR + STR = 1.0e0 / (AK + DNU) + QR = QR * STR + QI = QI * STR + STR = CKR*CZR - CKI*CZI + RAK = 1.0e0 / AK + CKI = (CKR*CZI + CKI*CZR) * RAK + CKR = STR * RAK + S1R = CKR*FR - CKI*FI + S1R + S1I = CKR*FI + CKI*FR + S1I + A1 = A1 * T1 * RAK + BK = BK + AK + AK + 1.0e0 + AK = AK + 1.0e0 + if A1 > TOL { + goto Sixty + } +Seventy: + YR[1] = S1R + YI[1] = S1I + if KODED == 1 { + return ZR, ZI, FNU, KODE, N, YR, YI, NZ, TOL, ELIM, ALIM + } + tmp = cmplx.Exp(complex(ZR, ZI)) + STR = real(tmp) + STI = imag(tmp) + tmp = complex(S1R, S1I) * complex(STR, STI) + YR[1] = real(tmp) + YI[1] = imag(tmp) + return ZR, ZI, FNU, KODE, N, YR, YI, NZ, TOL, ELIM, ALIM + + // GENERATE K(DNU,Z) AND K(DNU+1,Z) FOR FORWARD RECURRENCE. +Eighty: + if CAZ < TOL { + goto OneHundred + } + tmp = complex(ZR, ZI) * complex(ZR, ZI) + CZR = real(tmp) + CZI = imag(tmp) + CZR = 0.25e0 * CZR + CZI = 0.25e0 * CZI + T1 = 0.25e0 * CAZ * CAZ +Ninety: + FR = (FR*AK + PR + QR) / BK + FI = (FI*AK + PI + QI) / BK + STR = 1.0e0 / (AK - DNU) + PR = PR * STR + PI = PI * STR + STR = 1.0e0 / (AK + DNU) + QR = QR * STR + QI = QI * STR + STR = CKR*CZR - CKI*CZI + RAK = 1.0e0 / AK + CKI = (CKR*CZI + CKI*CZR) * RAK + CKR = STR * RAK + S1R = CKR*FR - CKI*FI + S1R + S1I = CKR*FI + CKI*FR + S1I + STR = PR - FR*AK + STI = PI - FI*AK + S2R = CKR*STR - CKI*STI + S2R + S2I = CKR*STI + CKI*STR + S2I + A1 = A1 * T1 * RAK + BK = BK + AK + AK + 1.0e0 + AK = AK + 1.0e0 + if A1 > TOL { + goto Ninety + } +OneHundred: + KFLAG = 2 + A1 = FNU + 1.0e0 + AK = A1 * math.Abs(SMUR) + if AK > ALIM { + KFLAG = 3 + } + STR = CSSR[KFLAG] + P2R = S2R * STR + P2I = S2I * STR + tmp = complex(P2R, P2I) * complex(RZR, RZI) + S2R = real(tmp) + S2I = imag(tmp) + S1R = S1R * STR + S1I = S1I * STR + if KODED == 1 { + goto TwoTen + } + tmp = cmplx.Exp(complex(ZR, ZI)) + FR = real(tmp) + FI = imag(tmp) + tmp = complex(S1R, S1I) * complex(FR, FI) + S1R = real(tmp) + S1I = imag(tmp) + tmp = complex(S2R, S2I) * complex(FR, FI) + S2R = real(tmp) + S2I = imag(tmp) + goto TwoTen + + // IFLAG=0 MEANS NO UNDERFLOW OCCURRED + // IFLAG=1 MEANS AN UNDERFLOW OCCURRED- COMPUTATION PROCEEDS WITH + // KODED=2 AND A TEST FOR ON SCALE VALUES IS MADE DURING FORWARD RECURSION +OneTen: + tmp = cmplx.Sqrt(complex(ZR, ZI)) + STR = real(tmp) + STI = imag(tmp) + tmp = complex(RTHPI, CZEROI) / complex(STR, STI) + COEFR = real(tmp) + COEFI = imag(tmp) + KFLAG = 2 + if KODED == 2 { + goto OneTwenty + } + if ZR > ALIM { + goto TwoNinety + } + + STR = math.Exp(-ZR) * CSSR[KFLAG] + //sin, cos = math.Sincos(ZI) + STI = -STR * math.Sin(ZI) + STR = STR * math.Cos(ZI) + tmp = complex(COEFR, COEFI) * complex(STR, STI) + COEFR = real(tmp) + COEFI = imag(tmp) +OneTwenty: + if math.Abs(DNU) == 0.5e0 { + goto ThreeHundred + } + // MILLER ALGORITHM FOR CABS(Z)>R1. + AK = math.Cos(DPI * DNU) + AK = math.Abs(AK) + if AK == CZEROR { + goto ThreeHundred + } + FHS = math.Abs(0.25e0 - DNU2) + if FHS == CZEROR { + goto ThreeHundred + } + + // COMPUTE R2=F(E). if CABS(Z)>=R2, USE FORWARD RECURRENCE TO + // DETERMINE THE BACKWARD INDEX K. R2=F(E) IS A STRAIGHT LINE ON + // 12<=E<=60. E IS COMPUTED FROM 2**(-E)=B**(1-I1MACH(14))= + // TOL WHERE B IS THE BASE OF THE ARITHMETIC. + T1 = float64(imach[14] - 1) + T1 = T1 * dmach[5] * 3.321928094e0 + T1 = math.Max(T1, 12.0e0) + T1 = math.Min(T1, 60.0e0) + T2 = TTH*T1 - 6.0e0 + if ZR != 0.0e0 { + goto OneThirty + } + T1 = HPI + goto OneFourty +OneThirty: + T1 = math.Atan(ZI / ZR) + T1 = math.Abs(T1) +OneFourty: + if T2 > CAZ { + goto OneSeventy + } + // FORWARD RECURRENCE LOOP WHEN CABS(Z)>=R2. + ETEST = AK / (DPI * CAZ * TOL) + FK = CONER + if ETEST < CONER { + goto OneEighty + } + FKS = CTWOR + CKR = CAZ + CAZ + CTWOR + P1R = CZEROR + P2R = CONER + for I = 1; I <= KMAX; I++ { + AK = FHS / FKS + CBR = CKR / (FK + CONER) + PTR = P2R + P2R = CBR*P2R - P1R*AK + P1R = PTR + CKR = CKR + CTWOR + FKS = FKS + FK + FK + CTWOR + FHS = FHS + FK + FK + FK = FK + CONER + STR = math.Abs(P2R) * FK + if ETEST < STR { + goto OneSixty + } + } + goto ThreeTen +OneSixty: + FK = FK + SPI*T1*math.Sqrt(T2/CAZ) + FHS = math.Abs(0.25 - DNU2) + goto OneEighty +OneSeventy: + // COMPUTE BACKWARD INDEX K FOR CABS(Z) 0 || N > 1 { + goto TwoHundred + } + ZDR = ZR + ZDI = ZI + if IFLAG == 1 { + goto TwoSeventy + } + goto TwoFourty +TwoHundred: + // COMPUTE P1/P2=(P1/CABS(P2)*CONJG(P2)/CABS(P2) FOR SCALING. + TM = cmplx.Abs(complex(P2R, P2I)) + PTR = 1.0e0 / TM + P1R = P1R * PTR + P1I = P1I * PTR + P2R = P2R * PTR + P2I = -P2I * PTR + tmp = complex(P1R, P1I) * complex(P2R, P2I) + PTR = real(tmp) + PTI = imag(tmp) + STR = DNU + 0.5e0 - PTR + STI = -PTI + tmp = complex(STR, STI) / complex(ZR, ZI) + STR = real(tmp) + STI = imag(tmp) + STR = STR + 1.0e0 + tmp = complex(STR, STI) * complex(S1R, S1I) + S2R = real(tmp) + S2I = imag(tmp) + + // FORWARD RECURSION ON THE THREE TERM RECURSION WITH RELATION WITH + // SCALING NEAR EXPONENT EXTREMES ON KFLAG=1 OR KFLAG=3 +TwoTen: + STR = DNU + 1.0e0 + CKR = STR * RZR + CKI = STR * RZI + if N == 1 { + INU = INU - 1 + } + if INU > 0 { + goto TwoTwenty + } + if N > 1 { + goto TwoFifteen + } + S1R = S2R + S1I = S2I +TwoFifteen: + ZDR = ZR + ZDI = ZI + if IFLAG == 1 { + goto TwoSeventy + } + goto TwoFourty +TwoTwenty: + INUB = 1 + if IFLAG == 1 { + goto TwoSixtyOne + } +TwoTwentyFive: + P1R = CSRR[KFLAG] + ASCLE = BRY[KFLAG] + for I = INUB; I <= INU; I++ { + STR = S2R + STI = S2I + S2R = CKR*STR - CKI*STI + S1R + S2I = CKR*STI + CKI*STR + S1I + S1R = STR + S1I = STI + CKR = CKR + RZR + CKI = CKI + RZI + if KFLAG >= 3 { + continue + } + P2R = S2R * P1R + P2I = S2I * P1R + STR = math.Abs(P2R) + STI = math.Abs(P2I) + P2M = math.Max(STR, STI) + if P2M <= ASCLE { + continue + } + KFLAG = KFLAG + 1 + ASCLE = BRY[KFLAG] + S1R = S1R * P1R + S1I = S1I * P1R + S2R = P2R + S2I = P2I + STR = CSSR[KFLAG] + S1R = S1R * STR + S1I = S1I * STR + S2R = S2R * STR + S2I = S2I * STR + P1R = CSRR[KFLAG] + } + if N != 1 { + goto TwoFourty + } + S1R = S2R + S1I = S2I +TwoFourty: + STR = CSRR[KFLAG] + YR[1] = S1R * STR + YI[1] = S1I * STR + if N == 1 { + return ZR, ZI, FNU, KODE, N, YR, YI, NZ, TOL, ELIM, ALIM + } + YR[2] = S2R * STR + YI[2] = S2I * STR + if N == 2 { + return ZR, ZI, FNU, KODE, N, YR, YI, NZ, TOL, ELIM, ALIM + } + KK = 2 +TwoFifty: + KK = KK + 1 + if KK > N { + return ZR, ZI, FNU, KODE, N, YR, YI, NZ, TOL, ELIM, ALIM + } + P1R = CSRR[KFLAG] + ASCLE = BRY[KFLAG] + for I = KK; I <= N; I++ { + P2R = S2R + P2I = S2I + S2R = CKR*P2R - CKI*P2I + S1R + S2I = CKI*P2R + CKR*P2I + S1I + S1R = P2R + S1I = P2I + CKR = CKR + RZR + CKI = CKI + RZI + P2R = S2R * P1R + P2I = S2I * P1R + YR[I] = P2R + YI[I] = P2I + if KFLAG >= 3 { + continue + } + STR = math.Abs(P2R) + STI = math.Abs(P2I) + P2M = math.Max(STR, STI) + if P2M <= ASCLE { + continue + } + KFLAG = KFLAG + 1 + ASCLE = BRY[KFLAG] + S1R = S1R * P1R + S1I = S1I * P1R + S2R = P2R + S2I = P2I + STR = CSSR[KFLAG] + S1R = S1R * STR + S1I = S1I * STR + S2R = S2R * STR + S2I = S2I * STR + P1R = CSRR[KFLAG] + } + return ZR, ZI, FNU, KODE, N, YR, YI, NZ, TOL, ELIM, ALIM + + // IFLAG=1 CASES, FORWARD RECURRENCE ON SCALED VALUES ON UNDERFLOW. +TwoSixtyOne: + HELIM = 0.5e0 * ELIM + ELM = math.Exp(-ELIM) + CELMR = ELM + ASCLE = BRY[1] + ZDR = ZR + ZDI = ZI + IC = -1 + J = 2 + for I = 1; I <= INU; I++ { + STR = S2R + STI = S2I + S2R = STR*CKR - STI*CKI + S1R + S2I = STI*CKR + STR*CKI + S1I + S1R = STR + S1I = STI + CKR = CKR + RZR + CKI = CKI + RZI + AS = cmplx.Abs(complex(S2R, S2I)) + ALAS = math.Log(AS) + P2R = -ZDR + ALAS + if P2R < (-ELIM) { + goto TwoSixtyThree + } + tmp = cmplx.Log(complex(S2R, S2I)) + STR = real(tmp) + STI = imag(tmp) + P2R = -ZDR + STR + P2I = -ZDI + STI + P2M = math.Exp(P2R) / TOL + // sin, cos = math.Sincos(P2I) + P1R = P2M * math.Cos(P2I) + P1I = P2M * math.Sin(P2I) + p = complex(P1R, P1I) + NW = Zuchk(p, ASCLE, TOL) + if NW != 0 { + goto TwoSixtyThree + } + J = 3 - J + CYR[J] = P1R + CYI[J] = P1I + if IC == (I - 1) { + goto TwoSixtyFour + } + IC = I + continue + TwoSixtyThree: + if ALAS < HELIM { + continue + } + ZDR = ZDR - ELIM + S1R = S1R * CELMR + S1I = S1I * CELMR + S2R = S2R * CELMR + S2I = S2I * CELMR + } + if N != 1 { + goto TwoSeventy + } + S1R = S2R + S1I = S2I + goto TwoSeventy +TwoSixtyFour: + KFLAG = 1 + INUB = I + 1 + S2R = CYR[J] + S2I = CYI[J] + J = 3 - J + S1R = CYR[J] + S1I = CYI[J] + if INUB <= INU { + goto TwoTwentyFive + } + if N != 1 { + goto TwoFourty + } + S1R = S2R + S1I = S2I + goto TwoFourty +TwoSeventy: + YR[1] = S1R + YI[1] = S1I + if N == 1 { + goto TwoEighty + } + YR[2] = S2R + YI[2] = S2I +TwoEighty: + ASCLE = BRY[1] + _, _, FNU, N, YR, YI, NZ, RZR, RZI, _, TOL, ELIM = Zkscl(ZDR, ZDI, FNU, N, YR, YI, RZR, RZI, ASCLE, TOL, ELIM) + INU = N - NZ + if INU <= 0 { + return ZR, ZI, FNU, KODE, N, YR, YI, NZ, TOL, ELIM, ALIM + } + KK = NZ + 1 + S1R = YR[KK] + S1I = YI[KK] + YR[KK] = S1R * CSRR[1] + YI[KK] = S1I * CSRR[1] + if INU == 1 { + return ZR, ZI, FNU, KODE, N, YR, YI, NZ, TOL, ELIM, ALIM + } + KK = NZ + 2 + S2R = YR[KK] + S2I = YI[KK] + YR[KK] = S2R * CSRR[1] + YI[KK] = S2I * CSRR[1] + if INU == 2 { + return ZR, ZI, FNU, KODE, N, YR, YI, NZ, TOL, ELIM, ALIM + } + T2 = FNU + float64(float32(KK-1)) + CKR = T2 * RZR + CKI = T2 * RZI + KFLAG = 1 + goto TwoFifty +TwoNinety: + + // SCALE BY math.Exp(Z), IFLAG = 1 CASES. + + IFLAG = 1 + KFLAG = 2 + goto OneTwenty + + // FNU=HALF ODD INTEGER CASE, DNU=-0.5 +ThreeHundred: + S1R = COEFR + S1I = COEFI + S2R = COEFR + S2I = COEFI + goto TwoTen + +ThreeTen: + NZ = -2 + return ZR, ZI, FNU, KODE, N, YR, YI, NZ, TOL, ELIM, ALIM +} + +// SET K FUNCTIONS TO ZERO ON UNDERFLOW, CONTINUE RECURRENCE +// ON SCALED FUNCTIONS UNTIL TWO MEMBERS COME ON SCALE, THEN +// return WITH MIN(NZ+2,N) VALUES SCALED BY 1/TOL. +func Zkscl(ZRR, ZRI, FNU float64, N int, YR, YI []float64, RZR, RZI, ASCLE, TOL, ELIM float64) ( + ZRRout, ZRIout, FNUout float64, Nout int, YRout, YIout []float64, NZ int, RZRout, RZIout, ASCLEout, TOLout, ELIMout float64) { + var ACS, AS, CKI, CKR, CSI, CSR, FN, STR, S1I, S1R, S2I, + S2R, ZEROI, ZEROR, ZDR, ZDI, CELMR, ELM, HELIM, ALAS float64 + + var I, IC, KK, NN, NW int + var tmp, c complex128 + var CYR, CYI [3]float64 + var sin, cos float64 + + // DIMENSION YR(N), YI(N), CYR(2), CYI(2) + ZEROR = 0 + ZEROI = 0 + IC = 0 + NN = min(2, N) + for I = 1; I <= NN; I++ { + S1R = YR[I] + S1I = YI[I] + CYR[I] = S1R + CYI[I] = S1I + AS = cmplx.Abs(complex(S1R, S1I)) + ACS = -ZRR + math.Log(AS) + NZ = NZ + 1 + YR[I] = ZEROR + YI[I] = ZEROI + if ACS < (-ELIM) { + continue + } + + tmp = cmplx.Log(complex(S1R, S1I)) + CSR = real(tmp) + CSI = imag(tmp) + CSR = CSR - ZRR + CSI = CSI - ZRI + STR = math.Exp(CSR) / TOL + // sin, cos = math.Sincos(CSI) + CSR = STR * math.Cos(CSI) + CSI = STR * math.Sin(CSI) + c = complex(CSR, CSI) + NW = Zuchk(c, ASCLE, TOL) + if NW != 0 { + continue + } + YR[I] = CSR + YI[I] = CSI + IC = I + NZ = NZ - 1 + } + if N == 1 { + return ZRR, ZRI, FNU, N, YR, YI, NZ, RZR, RZI, ASCLE, TOL, ELIM + } + if IC > 1 { + goto Twenty + } + YR[1] = ZEROR + YI[1] = ZEROI + NZ = 2 +Twenty: + if N == 2 { + return ZRR, ZRI, FNU, N, YR, YI, NZ, RZR, RZI, ASCLE, TOL, ELIM + } + if NZ == 0 { + return ZRR, ZRI, FNU, N, YR, YI, NZ, RZR, RZI, ASCLE, TOL, ELIM + } + FN = FNU + 1.0e0 + CKR = FN * RZR + CKI = FN * RZI + S1R = CYR[1] + S1I = CYI[1] + S2R = CYR[2] + S2I = CYI[2] + HELIM = 0.5e0 * ELIM + ELM = math.Exp(-ELIM) + CELMR = ELM + ZDR = ZRR + ZDI = ZRI + + // FIND TWO CONSECUTIVE Y VALUES ON SCALE. SCALE RECURRENCE IF + // S2 GETS LARGER THAN EXP(ELIM/2) + for I = 3; I <= N; I++ { + KK = I + CSR = S2R + CSI = S2I + S2R = CKR*CSR - CKI*CSI + S1R + S2I = CKI*CSR + CKR*CSI + S1I + S1R = CSR + S1I = CSI + CKR = CKR + RZR + CKI = CKI + RZI + AS = cmplx.Abs(complex(S2R, S2I)) + ALAS = math.Log(AS) + ACS = -ZDR + ALAS + NZ = NZ + 1 + YR[I] = ZEROR + YI[I] = ZEROI + if ACS < (-ELIM) { + goto TwentyFive + } + tmp = cmplx.Log(complex(S2R, S2I)) + CSR = real(tmp) + CSI = imag(tmp) + CSR = CSR - ZDR + CSI = CSI - ZDI + STR = math.Exp(CSR) / TOL + sin, cos = math.Sincos(CSI) + CSR = STR * cos + CSI = STR * sin + c = complex(CSR, CSI) + NW = Zuchk(c, ASCLE, TOL) + if NW != 0 { + goto TwentyFive + } + YR[I] = CSR + YI[I] = CSI + NZ = NZ - 1 + if IC == KK-1 { + goto Forty + } + IC = KK + continue + TwentyFive: + if ALAS < HELIM { + continue + } + ZDR = ZDR - ELIM + S1R = S1R * CELMR + S1I = S1I * CELMR + S2R = S2R * CELMR + S2I = S2I * CELMR + } + NZ = N + if IC == N { + NZ = N - 1 + } + goto FourtyFive +Forty: + NZ = KK - 2 +FourtyFive: + for I = 1; I <= NZ; I++ { + YR[I] = ZEROR + YI[I] = ZEROI + } + return ZRR, ZRI, FNU, N, YR, YI, NZ, RZR, RZI, ASCLE, TOL, ELIM +} + +// Zuchk tests whether the magnitude of the real or imaginary part would +// underflow when y is scaled by tol. +// +// y enters as a scaled quantity whose magnitude is greater than +// +// 1e3 + 3*dmach(1)/tol +// +// y is accepted if the underflow is at least one precision below the magnitude +// of the largest component. Otherwise an underflow is assumed as the phase angle +// does not have sufficient accuracy. +func Zuchk(y complex128, scale, tol float64) int { + absR := math.Abs(real(y)) + absI := math.Abs(imag(y)) + minAbs := math.Min(absR, absI) + if minAbs > scale { + return 0 + } + maxAbs := math.Max(absR, absI) + minAbs /= tol + if maxAbs < minAbs { + return 1 + } + return 0 +} + +// ZACAI APPLIES THE ANALYTIC CONTINUATION FORMULA +// +// K(FNU,ZN*EXP(MP))=K(FNU,ZN)*EXP(-MP*FNU) - MP*I(FNU,ZN) +// MP=PI*MR*CMPLX(0.0,1.0) +// +// TO CONTINUE THE K FUNCTION FROM THE RIGHT HALF TO THE LEFT +// HALF Z PLANE FOR USE WITH ZAIRY WHERE FNU=1/3 OR 2/3 AND N=1. +// ZACAI IS THE SAME AS ZACON WITH THE PARTS FOR LARGER ORDERS AND +// RECURRENCE REMOVED. A RECURSIVE CALL TO ZACON CAN RESULT if ZACON +// IS CALLED FROM ZAIRY. +func Zacai(ZR, ZI, FNU float64, KODE, MR, N int, YR, YI []float64, RL, TOL, ELIM, ALIM float64) ( + ZRout, ZIout, FNUout float64, KODEout, MRout, Nout int, YRout, YIout []float64, NZ int, RLout, TOLout, ELIMout, ALIMout float64) { + var ARG, ASCLE, AZ, CSGNR, CSGNI, CSPNR, + CSPNI, C1R, C1I, C2R, C2I, DFNU, FMR, PI, + SGN, YY, ZNR, ZNI float64 + var INU, IUF, NN, NW int + var zn, c1, c2, z complex128 + var y []complex128 + //var sin, cos float64 + + CYR := []float64{math.NaN(), 0, 0} + CYI := []float64{math.NaN(), 0, 0} + + PI = math.Pi + ZNR = -ZR + ZNI = -ZI + AZ = cmplx.Abs(complex(ZR, ZI)) + NN = N + DFNU = FNU + float64(float32(N-1)) + if AZ <= 2.0e0 { + goto Ten + } + if AZ*AZ*0.25 > DFNU+1.0e0 { + goto Twenty + } +Ten: + // POWER SERIES FOR THE I FUNCTION. + z = complex(ZNR, ZNI) + y = make([]complex128, len(YR)) + for i, v := range YR { + y[i] = complex(v, YI[i]) + } + Zseri(z, FNU, KODE, NN, y[1:], TOL, ELIM, ALIM) + for i, v := range y { + YR[i] = real(v) + YI[i] = imag(v) + } + goto Forty +Twenty: + if AZ < RL { + goto Thirty + } + // ASYMPTOTIC EXPANSION FOR LARGE Z FOR THE I FUNCTION. + ZNR, ZNI, FNU, KODE, _, YR, YI, NW, RL, TOL, ELIM, ALIM = Zasyi(ZNR, ZNI, FNU, KODE, NN, YR, YI, RL, TOL, ELIM, ALIM) + if NW < 0 { + goto Eighty + } + goto Forty +Thirty: + // MILLER ALGORITHM NORMALIZED BY THE SERIES FOR THE I FUNCTION + ZNR, ZNI, FNU, KODE, _, YR, YI, NW, TOL = Zmlri(ZNR, ZNI, FNU, KODE, NN, YR, YI, TOL) + if NW < 0 { + goto Eighty + } +Forty: + // ANALYTIC CONTINUATION TO THE LEFT HALF PLANE FOR THE K FUNCTION. + ZNR, ZNI, FNU, KODE, _, CYR, CYI, NW, TOL, ELIM, ALIM = Zbknu(ZNR, ZNI, FNU, KODE, 1, CYR, CYI, TOL, ELIM, ALIM) + if NW != 0 { + goto Eighty + } + FMR = float64(float32(MR)) + SGN = -math.Copysign(PI, FMR) + CSGNR = 0.0e0 + CSGNI = SGN + if KODE == 1 { + goto Fifty + } + YY = -ZNI + //sin, cos = math.Sincos(YY) + CSGNR = -CSGNI * math.Sin(YY) + CSGNI = CSGNI * math.Cos(YY) +Fifty: + // CALCULATE CSPN=EXP(FNU*PI*I) TO MINIMIZE LOSSES OF SIGNIFICANCE + // WHEN FNU IS LARGE + INU = int(float32(FNU)) + ARG = (FNU - float64(float32(INU))) * SGN + //sin, cos = math.Sincos(ARG) + CSPNR = math.Cos(ARG) + CSPNI = math.Sin(ARG) + if INU%2 == 0 { + goto Sixty + } + CSPNR = -CSPNR + CSPNI = -CSPNI +Sixty: + C1R = CYR[1] + C1I = CYI[1] + C2R = YR[1] + C2I = YI[1] + if KODE == 1 { + goto Seventy + } + IUF = 0 + ASCLE = 1.0e+3 * dmach[1] / TOL + zn = complex(ZNR, ZNI) + c1 = complex(C1R, C1I) + c2 = complex(C2R, C2I) + c1, c2, NW, _ = Zs1s2(zn, c1, c2, ASCLE, ALIM, IUF) + C1R = real(c1) + C1I = imag(c1) + C2R = real(c2) + C2I = imag(c2) + NZ = NZ + NW +Seventy: + YR[1] = CSPNR*C1R - CSPNI*C1I + CSGNR*C2R - CSGNI*C2I + YI[1] = CSPNR*C1I + CSPNI*C1R + CSGNR*C2I + CSGNI*C2R + return ZR, ZI, FNU, KODE, MR, N, YR, YI, NZ, RL, TOL, ELIM, ALIM +Eighty: + NZ = -1 + if NW == -2 { + NZ = -2 + } + return ZR, ZI, FNU, KODE, MR, N, YR, YI, NZ, RL, TOL, ELIM, ALIM +} + +// ZASYI COMPUTES THE I BESSEL FUNCTION FOR REAL(Z)>=0.0 BY +// MEANS OF THE ASYMPTOTIC EXPANSION FOR LARGE CABS(Z) IN THE +// REGION CABS(Z)>MAX(RL,FNU*FNU/2). NZ=0 IS A NORMAL return. +// NZ<0 INDICATES AN OVERFLOW ON KODE=1. +func Zasyi(ZR, ZI, FNU float64, KODE, N int, YR, YI []float64, RL, TOL, ELIM, ALIM float64) ( + ZRout, ZIout, FNUout float64, KODEout, Nout int, YRout, YIout []float64, NZ int, RLout, TOLout, ELIMout, ALIMout float64) { + var AA, AEZ, AK, AK1I, AK1R, ARG, ARM, ATOL, + AZ, BB, BK, CKI, CKR, CONEI, CONER, CS1I, CS1R, CS2I, CS2R, CZI, + CZR, DFNU, DKI, DKR, DNU2, EZI, EZR, FDN, PI, P1I, + P1R, RAZ, RTPI, RTR1, RZI, RZR, S, SGN, SQK, STI, STR, S2I, + S2R, TZI, TZR, ZEROI, ZEROR float64 + + var I, IB, IL, INU, J, JL, K, KODED, M, NN int + var tmp complex128 + // var sin, cos float64 + + PI = math.Pi + RTPI = 0.159154943091895336e0 + ZEROR = 0 + ZEROI = 0 + CONER = 1 + CONEI = 0 + + AZ = cmplx.Abs(complex(ZR, ZI)) + ARM = 1.0e3 * dmach[1] + RTR1 = math.Sqrt(ARM) + IL = min(2, N) + DFNU = FNU + float64(float32(N-IL)) + + // OVERFLOW TEST + RAZ = 1.0e0 / AZ + STR = ZR * RAZ + STI = -ZI * RAZ + AK1R = RTPI * STR * RAZ + AK1I = RTPI * STI * RAZ + tmp = cmplx.Sqrt(complex(AK1R, AK1I)) + AK1R = real(tmp) + AK1I = imag(tmp) + CZR = ZR + CZI = ZI + if KODE != 2 { + goto Ten + } + CZR = ZEROR + CZI = ZI +Ten: + if math.Abs(CZR) > ELIM { + goto OneHundred + } + DNU2 = DFNU + DFNU + KODED = 1 + if (math.Abs(CZR) > ALIM) && (N > 2) { + goto Twenty + } + KODED = 0 + tmp = cmplx.Exp(complex(CZR, CZI)) + STR = real(tmp) + STI = imag(tmp) + tmp = complex(AK1R, AK1I) * complex(STR, STI) + AK1R = real(tmp) + AK1I = imag(tmp) +Twenty: + FDN = 0.0e0 + if DNU2 > RTR1 { + FDN = DNU2 * DNU2 + } + EZR = ZR * 8.0e0 + EZI = ZI * 8.0e0 + + // WHEN Z IS IMAGINARY, THE ERROR TEST MUST BE MADE RELATIVE TO THE + // FIRST RECIPROCAL POWER SINCE THIS IS THE LEADING TERM OF THE + // EXPANSION FOR THE IMAGINARY PART. + AEZ = 8.0e0 * AZ + S = TOL / AEZ + JL = int(float32(RL+RL)) + 2 + P1R = ZEROR + P1I = ZEROI + if ZI == 0.0e0 { + goto Thirty + } + + // CALCULATE EXP(PI*(0.5+FNU+N-IL)*I) TO MINIMIZE LOSSES OF + // SIGNIFICANCE WHEN FNU OR N IS LARGE + INU = int(float32(FNU)) + ARG = (FNU - float64(float32(INU))) * PI + INU = INU + N - IL + //sin, cos = math.Sincos(ARG) + AK = -math.Sin(ARG) + BK = math.Cos(ARG) + if ZI < 0.0e0 { + BK = -BK + } + P1R = AK + P1I = BK + if INU%2 == 0 { + goto Thirty + } + P1R = -P1R + P1I = -P1I +Thirty: + for K = 1; K <= IL; K++ { + SQK = FDN - 1.0e0 + ATOL = S * math.Abs(SQK) + SGN = 1.0e0 + CS1R = CONER + CS1I = CONEI + CS2R = CONER + CS2I = CONEI + CKR = CONER + CKI = CONEI + AK = 0.0e0 + AA = 1.0e0 + BB = AEZ + DKR = EZR + DKI = EZI + // TODO(btracey): This loop is executed tens of thousands of times. Why? + // is that really necessary? + for J = 1; J <= JL; J++ { + tmp = complex(CKR, CKI) / complex(DKR, DKI) + STR = real(tmp) + STI = imag(tmp) + CKR = STR * SQK + CKI = STI * SQK + CS2R = CS2R + CKR + CS2I = CS2I + CKI + SGN = -SGN + CS1R = CS1R + CKR*SGN + CS1I = CS1I + CKI*SGN + DKR = DKR + EZR + DKI = DKI + EZI + AA = AA * math.Abs(SQK) / BB + BB = BB + AEZ + AK = AK + 8.0e0 + SQK = SQK - AK + if AA <= ATOL { + goto Fifty + } + } + goto OneTen + Fifty: + S2R = CS1R + S2I = CS1I + if ZR+ZR >= ELIM { + goto Sixty + } + TZR = ZR + ZR + TZI = ZI + ZI + tmp = cmplx.Exp(complex(-TZR, -TZI)) + STR = real(tmp) + STI = imag(tmp) + tmp = complex(STR, STI) * complex(P1R, P1I) + STR = real(tmp) + STI = imag(tmp) + tmp = complex(STR, STI) * complex(CS2R, CS2I) + STR = real(tmp) + STI = imag(tmp) + S2R = S2R + STR + S2I = S2I + STI + Sixty: + FDN = FDN + 8.0e0*DFNU + 4.0e0 + P1R = -P1R + P1I = -P1I + M = N - IL + K + YR[M] = S2R*AK1R - S2I*AK1I + YI[M] = S2R*AK1I + S2I*AK1R + } + if N <= 2 { + return ZR, ZI, FNU, KODE, N, YR, YI, NZ, RL, TOL, ELIM, ALIM + } + NN = N + K = NN - 2 + AK = float64(float32(K)) + STR = ZR * RAZ + STI = -ZI * RAZ + RZR = (STR + STR) * RAZ + RZI = (STI + STI) * RAZ + IB = 3 + for I = IB; I <= NN; I++ { + YR[K] = (AK+FNU)*(RZR*YR[K+1]-RZI*YI[K+1]) + YR[K+2] + YI[K] = (AK+FNU)*(RZR*YI[K+1]+RZI*YR[K+1]) + YI[K+2] + AK = AK - 1.0e0 + K = K - 1 + } + if KODED == 0 { + return ZR, ZI, FNU, KODE, N, YR, YI, NZ, RL, TOL, ELIM, ALIM + } + tmp = cmplx.Exp(complex(CZR, CZI)) + CKR = real(tmp) + CKI = imag(tmp) + for I = 1; I <= NN; I++ { + STR = YR[I]*CKR - YI[I]*CKI + YI[I] = YR[I]*CKI + YI[I]*CKR + YR[I] = STR + } + return ZR, ZI, FNU, KODE, N, YR, YI, NZ, RL, TOL, ELIM, ALIM +OneHundred: + NZ = -1 + return ZR, ZI, FNU, KODE, N, YR, YI, NZ, RL, TOL, ELIM, ALIM +OneTen: + NZ = -2 + return ZR, ZI, FNU, KODE, N, YR, YI, NZ, RL, TOL, ELIM, ALIM +} + +// ZMLRI COMPUTES THE I BESSEL FUNCTION FOR RE(Z)>=0.0 BY THE +// MILLER ALGORITHM NORMALIZED BY A NEUMANN SERIES. +func Zmlri(ZR, ZI, FNU float64, KODE, N int, YR, YI []float64, TOL float64) ( + ZRout, ZIout, FNUout float64, KODEout, Nout int, YRout, YIout []float64, NZ int, TOLout float64) { + var ACK, AK, AP, AT, AZ, BK, CKI, CKR, CNORMI, + CNORMR, CONEI, CONER, FKAP, FKK, FLAM, FNF, PTI, PTR, P1I, + P1R, P2I, P2R, RAZ, RHO, RHO2, RZI, RZR, SCLE, STI, STR, SUMI, + SUMR, TFNF, TST, ZEROI, ZEROR float64 + var I, IAZ, IDUM, IFNU, INU, ITIME, K, KK, KM, M int + var tmp complex128 + ZEROR = 0 + ZEROI = 0 + CONER = 1 + CONEI = 0 + + SCLE = dmach[1] / TOL + AZ = cmplx.Abs(complex(ZR, ZI)) + IAZ = int(float32(AZ)) + IFNU = int(float32(FNU)) + INU = IFNU + N - 1 + AT = float64(float32(IAZ)) + 1.0e0 + RAZ = 1.0e0 / AZ + STR = ZR * RAZ + STI = -ZI * RAZ + CKR = STR * AT * RAZ + CKI = STI * AT * RAZ + RZR = (STR + STR) * RAZ + RZI = (STI + STI) * RAZ + P1R = ZEROR + P1I = ZEROI + P2R = CONER + P2I = CONEI + ACK = (AT + 1.0e0) * RAZ + RHO = ACK + math.Sqrt(ACK*ACK-1.0e0) + RHO2 = RHO * RHO + TST = (RHO2 + RHO2) / ((RHO2 - 1.0e0) * (RHO - 1.0e0)) + TST = TST / TOL + + // COMPUTE RELATIVE TRUNCATION ERROR INDEX FOR SERIES. + //fmt.Println("before loop", P2R, P2I, CKR, CKI, RZR, RZI, TST, AK) + AK = AT + for I = 1; I <= 80; I++ { + PTR = P2R + PTI = P2I + P2R = P1R - (CKR*PTR - CKI*PTI) + P2I = P1I - (CKI*PTR + CKR*PTI) + P1R = PTR + P1I = PTI + CKR = CKR + RZR + CKI = CKI + RZI + AP = cmplx.Abs(complex(P2R, P2I)) + if AP > TST*AK*AK { + goto Twenty + } + AK = AK + 1.0e0 + } + goto OneTen +Twenty: + I = I + 1 + K = 0 + if INU < IAZ { + goto Forty + } + // COMPUTE RELATIVE TRUNCATION ERROR FOR RATIOS. + P1R = ZEROR + P1I = ZEROI + P2R = CONER + P2I = CONEI + AT = float64(float32(INU)) + 1.0e0 + STR = ZR * RAZ + STI = -ZI * RAZ + CKR = STR * AT * RAZ + CKI = STI * AT * RAZ + ACK = AT * RAZ + TST = math.Sqrt(ACK / TOL) + ITIME = 1 + for K = 1; K <= 80; K++ { + PTR = P2R + PTI = P2I + P2R = P1R - (CKR*PTR - CKI*PTI) + P2I = P1I - (CKR*PTI + CKI*PTR) + P1R = PTR + P1I = PTI + CKR = CKR + RZR + CKI = CKI + RZI + AP = cmplx.Abs(complex(P2R, P2I)) + if AP < TST { + continue + } + if ITIME == 2 { + goto Forty + } + ACK = cmplx.Abs(complex(CKR, CKI)) + FLAM = ACK + math.Sqrt(ACK*ACK-1.0e0) + FKAP = AP / cmplx.Abs(complex(P1R, P1I)) + RHO = math.Min(FLAM, FKAP) + TST = TST * math.Sqrt(RHO/(RHO*RHO-1.0e0)) + ITIME = 2 + } + goto OneTen +Forty: + // BACKWARD RECURRENCE AND SUM NORMALIZING RELATION. + K = K + 1 + KK = max(I+IAZ, K+INU) + FKK = float64(float32(KK)) + P1R = ZEROR + P1I = ZEROI + + // SCALE P2 AND SUM BY SCLE. + P2R = SCLE + P2I = ZEROI + FNF = FNU - float64(float32(IFNU)) + TFNF = FNF + FNF + BK = dgamln(FKK+TFNF+1.0e0, IDUM) - dgamln(FKK+1.0e0, IDUM) - dgamln(TFNF+1.0e0, IDUM) + BK = math.Exp(BK) + SUMR = ZEROR + SUMI = ZEROI + KM = KK - INU + for I = 1; I <= KM; I++ { + PTR = P2R + PTI = P2I + P2R = P1R + (FKK+FNF)*(RZR*PTR-RZI*PTI) + P2I = P1I + (FKK+FNF)*(RZI*PTR+RZR*PTI) + P1R = PTR + P1I = PTI + AK = 1.0e0 - TFNF/(FKK+TFNF) + ACK = BK * AK + SUMR = SUMR + (ACK+BK)*P1R + SUMI = SUMI + (ACK+BK)*P1I + BK = ACK + FKK = FKK - 1.0e0 + } + YR[N] = P2R + YI[N] = P2I + if N == 1 { + goto Seventy + } + for I = 2; I <= N; I++ { + PTR = P2R + PTI = P2I + P2R = P1R + (FKK+FNF)*(RZR*PTR-RZI*PTI) + P2I = P1I + (FKK+FNF)*(RZI*PTR+RZR*PTI) + P1R = PTR + P1I = PTI + AK = 1.0e0 - TFNF/(FKK+TFNF) + ACK = BK * AK + SUMR = SUMR + (ACK+BK)*P1R + SUMI = SUMI + (ACK+BK)*P1I + BK = ACK + FKK = FKK - 1.0e0 + M = N - I + 1 + YR[M] = P2R + YI[M] = P2I + } +Seventy: + if IFNU <= 0 { + goto Ninety + } + for I = 1; I <= IFNU; I++ { + PTR = P2R + PTI = P2I + P2R = P1R + (FKK+FNF)*(RZR*PTR-RZI*PTI) + P2I = P1I + (FKK+FNF)*(RZR*PTI+RZI*PTR) + P1R = PTR + P1I = PTI + AK = 1.0e0 - TFNF/(FKK+TFNF) + ACK = BK * AK + SUMR = SUMR + (ACK+BK)*P1R + SUMI = SUMI + (ACK+BK)*P1I + BK = ACK + FKK = FKK - 1.0e0 + } +Ninety: + PTR = ZR + PTI = ZI + if KODE == 2 { + PTR = ZEROR + } + tmp = cmplx.Log(complex(RZR, RZI)) + STR = real(tmp) + STI = imag(tmp) + P1R = -FNF*STR + PTR + P1I = -FNF*STI + PTI + AP = dgamln(1.0e0+FNF, IDUM) + PTR = P1R - AP + PTI = P1I + + // THE DIVISION CEXP(PT)/(SUM+P2) IS ALTERED TO AVOID OVERFLOW + // IN THE DENOMINATOR BY SQUARING LARGE QUANTITIES. + P2R = P2R + SUMR + P2I = P2I + SUMI + AP = cmplx.Abs(complex(P2R, P2I)) + P1R = 1.0e0 / AP + tmp = cmplx.Exp(complex(PTR, PTI)) + STR = real(tmp) + STI = imag(tmp) + CKR = STR * P1R + CKI = STI * P1R + PTR = P2R * P1R + PTI = -P2I * P1R + tmp = complex(CKR, CKI) * complex(PTR, PTI) + CNORMR = real(tmp) + CNORMI = imag(tmp) + for I = 1; I <= N; I++ { + STR = YR[I]*CNORMR - YI[I]*CNORMI + YI[I] = YR[I]*CNORMI + YI[I]*CNORMR + YR[I] = STR + } + return ZR, ZI, FNU, KODE, N, YR, YI, NZ, TOL +OneTen: + NZ = -2 + return ZR, ZI, FNU, KODE, N, YR, YI, NZ, TOL +} + +// Zseri computes the I bessel function for real(z) >= 0 by means of the power +// series for large |z| in the region |z| <= 2*sqrt(fnu+1). +// +// nz = 0 is a normal return. nz > 0 means that the last nz components were set +// to zero due to underflow. nz < 0 means that underflow occurred, but the +// condition |z| <= 2*sqrt(fnu+1) was violated and the computation must be +// completed in another routine with n -= abs(nz). +func Zseri(z complex128, fnu float64, kode, n int, y []complex128, tol, elim, alim float64) (nz int) { + // TODO(btracey): The original fortran line is "ARM = 1.0D+3*D1MACH(1)". Evidently, in Fortran + // this is interpreted as one to the power of +3*D1MACH(1). While it is possible + // this was intentional, it seems unlikely. + arm := 1000 * dmach[1] + az := cmplx.Abs(z) + if az < arm { + for i := 0; i < n; i++ { + y[i] = 0 + } + if fnu == 0 { + y[0] = 1 + n-- + } + if az == 0 { + return 0 + } + return n + } + hz := 0.5 * z + var cz complex128 + var acz float64 + if az > math.Sqrt(arm) { + cz = hz * hz + acz = cmplx.Abs(cz) + } + NN := n + ck := cmplx.Log(hz) + var ak1 complex128 + for { + dfnu := fnu + float64(NN-1) + // Underflow test. + ak1 = ck * complex(dfnu, 0) + ak := dgamln(dfnu+1, 0) + ak1 -= complex(ak, 0) + if kode == 2 { + ak1 -= complex(real(z), 0) + } + if real(ak1) > -elim { + break + } + nz++ + y[NN-1] = 0 + if acz > dfnu { + // Return with nz < 0 if abs(Z*Z/4)>fnu+u-nz-1 complete the calculation + // in cbinu with n = n - abs(nz). + nz *= -1 + return nz + } + NN-- + if NN == 0 { + return nz + } + } + crscr := 1.0 + var flag int + var scale float64 + aa := real(ak1) + if aa <= -alim { + flag = 1 + crscr = tol + scale = arm / tol + aa -= math.Log(tol) + } + var w [2]complex128 + for { + coef := cmplx.Exp(complex(aa, imag(ak1))) + atol := tol * acz / (fnu + float64(NN)) + for i := 0; i < min(2, NN); i++ { + FNUP := fnu + float64(NN-i) + s1 := 1 + 0i + if acz >= tol*FNUP { + ak2 := 1 + 0i + ak := FNUP + 2 + S := FNUP + scl := 2.0 + first := true + for first || scl > atol { + ak2 = ak2 * cz * complex(1/S, 0) + scl *= acz / S + s1 += ak2 + S += ak + ak += 2 + first = false + } + } + s2 := s1 * coef + w[i] = s2 + if flag == 1 { + if Zuchk(s2, scale, tol) != 0 { + var full bool + var dfnu float64 + // This code is similar to the code that exists above. The + // code copying is here because the original Fortran used + // a goto to solve the loop-and-a-half problem. Removing the + // goto makes the behavior of the function and variable scoping + // much clearer, but requires copying this code due to Go's + // goto rules. + for { + if full { + dfnu = fnu + float64(NN-1) + // Underflow test. + ak1 = ck * complex(dfnu, 0) + ak1 -= complex(dgamln(dfnu+1, 0), 0) + if kode == 2 { + ak1 -= complex(real(z), 0) + } + if real(ak1) > -elim { + break + } + } else { + full = true + } + nz++ + y[NN-1] = 0 + if acz > dfnu { + // Return with nz < 0 if abs(Z*Z/4)>fnu+u-nz-1 complete the calculation + // in cbinu with n = n - abs(nz). + nz *= -1 + return nz + } + NN-- + if NN == 0 { + return nz + } + } + continue + } + } + y[NN-i-1] = s2 * complex(crscr, 0) + coef /= hz + coef *= complex(FNUP-1, 0) + } + break + } + if NN <= 2 { + return nz + } + rz := complex(2*real(z)/(az*az), -2*imag(z)/(az*az)) + if flag == 0 { + for i := NN - 3; i >= 0; i-- { + y[i] = complex(float64(i+1)+fnu, 0)*rz*y[i+1] + y[i+2] + } + return nz + } + + // exp(-alim)=exp(-elim)/tol=approximately one digit of precision above the + // underflow limit, which equals scale = dmach[1)*SS*1e3. + s1 := w[0] + s2 := w[1] + for K := NN - 3; K >= 0; K-- { + s1, s2 = s2, s1+complex(float64(K+1)+fnu, 0)*(rz*s2) + ck := s2 * complex(crscr, 0) + y[K] = ck + if cmplx.Abs(ck) > scale { + for ; K >= 0; K-- { + y[K] = complex(float64(K+1)+fnu, 0)*rz*y[K+1] + y[K+2] + } + return nz + } + } + return nz +} + +// Zs1s2 tests for a possible underflow resulting from the addition of the I and +// K functions in the analytic continuation formula where s1 == K function and +// s2 == I function. +// +// When kode == 1, the I and K functions are different orders of magnitude. +// +// When kode == 2, they may both be of the same order of magnitude, but the maximum +// must be at least one precision above the underflow limit. +func Zs1s2(zr, s1, s2 complex128, scale, lim float64, iuf int) (s1o, s2o complex128, nz, iufo int) { + if s1 == 0 || math.Log(cmplx.Abs(s1))-2*real(zr) < -lim { + if cmplx.Abs(s2) > scale { + return 0, s2, 0, iuf + } + return 0, 0, 1, 0 + } + // TODO(btracey): Written like this for numerical rounding reasons. + // Fix once we're sure other changes are correct. + s1 = cmplx.Exp(cmplx.Log(s1) - zr - zr) + if math.Max(cmplx.Abs(s1), cmplx.Abs(s2)) > scale { + return s1, s2, 0, iuf + 1 + } + return 0, 0, 1, 0 +} + +func dgamln(z float64, ierr int) float64 { + //return amoslib.DgamlnFort(z) + // Go implementation. + if z < 0 { + return 0 + } + a2, _ := math.Lgamma(z) + return a2 +} diff --git a/vendor/gonum.org/v1/gonum/mathext/internal/amos/doc.go b/vendor/gonum.org/v1/gonum/mathext/internal/amos/doc.go new file mode 100644 index 0000000000..32f96c1474 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/mathext/internal/amos/doc.go @@ -0,0 +1,6 @@ +// Copyright ©2017 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package amos implements functions originally in the Netlib code by Donald Amos. +package amos // import "gonum.org/v1/gonum/mathext/internal/amos" diff --git a/vendor/gonum.org/v1/gonum/mathext/internal/amos/staticcheck.conf b/vendor/gonum.org/v1/gonum/mathext/internal/amos/staticcheck.conf new file mode 100644 index 0000000000..e7e254ff3f --- /dev/null +++ b/vendor/gonum.org/v1/gonum/mathext/internal/amos/staticcheck.conf @@ -0,0 +1 @@ +checks = [] diff --git a/vendor/gonum.org/v1/gonum/mathext/internal/cephes/cephes.go b/vendor/gonum.org/v1/gonum/mathext/internal/cephes/cephes.go new file mode 100644 index 0000000000..20cac067ea --- /dev/null +++ b/vendor/gonum.org/v1/gonum/mathext/internal/cephes/cephes.go @@ -0,0 +1,28 @@ +// Copyright ©2016 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package cephes + +import "math" + +/* +Additional copyright information: + +Code in this package is adapted from the Cephes library (http://www.netlib.org/cephes/). +There is no explicit licence on Netlib, but the author has agreed to a BSD release. +See https://github.com/deepmind/torch-cephes/blob/master/LICENSE.txt and +https://lists.debian.org/debian-legal/2004/12/msg00295.html +*/ + +const ( + paramOutOfBounds = "cephes: parameter out of bounds" + errParamFunctionSingularity = "cephes: function singularity" +) + +const ( + machEp = 1.0 / (1 << 53) + maxLog = 1024 * math.Ln2 + minLog = -1075 * math.Ln2 + maxIter = 2000 +) diff --git a/vendor/gonum.org/v1/gonum/mathext/internal/cephes/doc.go b/vendor/gonum.org/v1/gonum/mathext/internal/cephes/doc.go new file mode 100644 index 0000000000..086c46948b --- /dev/null +++ b/vendor/gonum.org/v1/gonum/mathext/internal/cephes/doc.go @@ -0,0 +1,6 @@ +// Copyright ©2017 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package cephes implements functions originally in the Netlib code by Stephen Mosher. +package cephes // import "gonum.org/v1/gonum/mathext/internal/cephes" diff --git a/vendor/gonum.org/v1/gonum/mathext/internal/cephes/igam.go b/vendor/gonum.org/v1/gonum/mathext/internal/cephes/igam.go new file mode 100644 index 0000000000..4bc0bd1dcb --- /dev/null +++ b/vendor/gonum.org/v1/gonum/mathext/internal/cephes/igam.go @@ -0,0 +1,320 @@ +// Derived from SciPy's special/cephes/igam.c and special/cephes/igam.h +// https://github.com/scipy/scipy/blob/master/scipy/special/cephes/igam.c +// https://github.com/scipy/scipy/blob/master/scipy/special/cephes/igam.h +// Made freely available by Stephen L. Moshier without support or guarantee. + +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. +// Copyright ©1985, ©1987 by Stephen L. Moshier +// Portions Copyright ©2016 The Gonum Authors. All rights reserved. + +package cephes + +import "math" + +const ( + igamDimK = 25 + igamDimN = 25 + igam = 1 + igamC = 0 + igamSmall = 20 + igamLarge = 200 + igamSmallRatio = 0.3 + igamLargeRatio = 4.5 +) + +var igamCoefs = [igamDimK][igamDimN]float64{ + {-3.3333333333333333e-1, 8.3333333333333333e-2, -1.4814814814814815e-2, 1.1574074074074074e-3, 3.527336860670194e-4, -1.7875514403292181e-4, 3.9192631785224378e-5, -2.1854485106799922e-6, -1.85406221071516e-6, 8.296711340953086e-7, -1.7665952736826079e-7, 6.7078535434014986e-9, 1.0261809784240308e-8, -4.3820360184533532e-9, 9.1476995822367902e-10, -2.551419399494625e-11, -5.8307721325504251e-11, 2.4361948020667416e-11, -5.0276692801141756e-12, 1.1004392031956135e-13, 3.3717632624009854e-13, -1.3923887224181621e-13, 2.8534893807047443e-14, -5.1391118342425726e-16, -1.9752288294349443e-15}, + {-1.8518518518518519e-3, -3.4722222222222222e-3, 2.6455026455026455e-3, -9.9022633744855967e-4, 2.0576131687242798e-4, -4.0187757201646091e-7, -1.8098550334489978e-5, 7.6491609160811101e-6, -1.6120900894563446e-6, 4.6471278028074343e-9, 1.378633446915721e-7, -5.752545603517705e-8, 1.1951628599778147e-8, -1.7543241719747648e-11, -1.0091543710600413e-9, 4.1627929918425826e-10, -8.5639070264929806e-11, 6.0672151016047586e-14, 7.1624989648114854e-12, -2.9331866437714371e-12, 5.9966963656836887e-13, -2.1671786527323314e-16, -4.9783399723692616e-14, 2.0291628823713425e-14, -4.13125571381061e-15}, + {4.1335978835978836e-3, -2.6813271604938272e-3, 7.7160493827160494e-4, 2.0093878600823045e-6, -1.0736653226365161e-4, 5.2923448829120125e-5, -1.2760635188618728e-5, 3.4235787340961381e-8, 1.3721957309062933e-6, -6.298992138380055e-7, 1.4280614206064242e-7, -2.0477098421990866e-10, -1.4092529910867521e-8, 6.228974084922022e-9, -1.3670488396617113e-9, 9.4283561590146782e-13, 1.2872252400089318e-10, -5.5645956134363321e-11, 1.1975935546366981e-11, -4.1689782251838635e-15, -1.0940640427884594e-12, 4.6622399463901357e-13, -9.905105763906906e-14, 1.8931876768373515e-17, 8.8592218725911273e-15}, + {6.4943415637860082e-4, 2.2947209362139918e-4, -4.6918949439525571e-4, 2.6772063206283885e-4, -7.5618016718839764e-5, -2.3965051138672967e-7, 1.1082654115347302e-5, -5.6749528269915966e-6, 1.4230900732435884e-6, -2.7861080291528142e-11, -1.6958404091930277e-7, 8.0994649053880824e-8, -1.9111168485973654e-8, 2.3928620439808118e-12, 2.0620131815488798e-9, -9.4604966618551322e-10, 2.1541049775774908e-10, -1.388823336813903e-14, -2.1894761681963939e-11, 9.7909989511716851e-12, -2.1782191880180962e-12, 6.2088195734079014e-17, 2.126978363279737e-13, -9.3446887915174333e-14, 2.0453671226782849e-14}, + {-8.618882909167117e-4, 7.8403922172006663e-4, -2.9907248030319018e-4, -1.4638452578843418e-6, 6.6414982154651222e-5, -3.9683650471794347e-5, 1.1375726970678419e-5, 2.5074972262375328e-10, -1.6954149536558306e-6, 8.9075075322053097e-7, -2.2929348340008049e-7, 2.956794137544049e-11, 2.8865829742708784e-8, -1.4189739437803219e-8, 3.4463580499464897e-9, -2.3024517174528067e-13, -3.9409233028046405e-10, 1.8602338968504502e-10, -4.356323005056618e-11, 1.2786001016296231e-15, 4.6792750266579195e-12, -2.1492464706134829e-12, 4.9088156148096522e-13, -6.3385914848915603e-18, -5.0453320690800944e-14}, + {-3.3679855336635815e-4, -6.9728137583658578e-5, 2.7727532449593921e-4, -1.9932570516188848e-4, 6.7977804779372078e-5, 1.419062920643967e-7, -1.3594048189768693e-5, 8.0184702563342015e-6, -2.2914811765080952e-6, -3.252473551298454e-10, 3.4652846491085265e-7, -1.8447187191171343e-7, 4.8240967037894181e-8, -1.7989466721743515e-14, -6.3061945000135234e-9, 3.1624176287745679e-9, -7.8409242536974293e-10, 5.1926791652540407e-15, 9.3589442423067836e-11, -4.5134262161632782e-11, 1.0799129993116827e-11, -3.661886712685252e-17, -1.210902069055155e-12, 5.6807435849905643e-13, -1.3249659916340829e-13}, + {5.3130793646399222e-4, -5.9216643735369388e-4, 2.7087820967180448e-4, 7.9023532326603279e-7, -8.1539693675619688e-5, 5.6116827531062497e-5, -1.8329116582843376e-5, -3.0796134506033048e-9, 3.4651553688036091e-6, -2.0291327396058604e-6, 5.7887928631490037e-7, 2.338630673826657e-13, -8.8286007463304835e-8, 4.7435958880408128e-8, -1.2545415020710382e-8, 8.6496488580102925e-14, 1.6846058979264063e-9, -8.5754928235775947e-10, 2.1598224929232125e-10, -7.6132305204761539e-16, -2.6639822008536144e-11, 1.3065700536611057e-11, -3.1799163902367977e-12, 4.7109761213674315e-18, 3.6902800842763467e-13}, + {3.4436760689237767e-4, 5.1717909082605922e-5, -3.3493161081142236e-4, 2.812695154763237e-4, -1.0976582244684731e-4, -1.2741009095484485e-7, 2.7744451511563644e-5, -1.8263488805711333e-5, 5.7876949497350524e-6, 4.9387589339362704e-10, -1.0595367014026043e-6, 6.1667143761104075e-7, -1.7562973359060462e-7, -1.2974473287015439e-12, 2.695423606288966e-8, -1.4578352908731271e-8, 3.887645959386175e-9, -3.8810022510194121e-17, -5.3279941738772867e-10, 2.7437977643314845e-10, -6.9957960920705679e-11, 2.5899863874868481e-17, 8.8566890996696381e-12, -4.403168815871311e-12, 1.0865561947091654e-12}, + {-6.5262391859530942e-4, 8.3949872067208728e-4, -4.3829709854172101e-4, -6.969091458420552e-7, 1.6644846642067548e-4, -1.2783517679769219e-4, 4.6299532636913043e-5, 4.5579098679227077e-9, -1.0595271125805195e-5, 6.7833429048651666e-6, -2.1075476666258804e-6, -1.7213731432817145e-11, 3.7735877416110979e-7, -2.1867506700122867e-7, 6.2202288040189269e-8, 6.5977038267330006e-16, -9.5903864974256858e-9, 5.2132144922808078e-9, -1.3991589583935709e-9, 5.382058999060575e-16, 1.9484714275467745e-10, -1.0127287556389682e-10, 2.6077347197254926e-11, -5.0904186999932993e-18, -3.3721464474854592e-12}, + {-5.9676129019274625e-4, -7.2048954160200106e-5, 6.7823088376673284e-4, -6.4014752602627585e-4, 2.7750107634328704e-4, 1.8197008380465151e-7, -8.4795071170685032e-5, 6.105192082501531e-5, -2.1073920183404862e-5, -8.8585890141255994e-10, 4.5284535953805377e-6, -2.8427815022504408e-6, 8.7082341778646412e-7, 3.6886101871706965e-12, -1.5344695190702061e-7, 8.862466778790695e-8, -2.5184812301826817e-8, -1.0225912098215092e-14, 3.8969470758154777e-9, -2.1267304792235635e-9, 5.7370135528051385e-10, -1.887749850169741e-19, -8.0931538694657866e-11, 4.2382723283449199e-11, -1.1002224534207726e-11}, + {1.3324454494800656e-3, -1.9144384985654775e-3, 1.1089369134596637e-3, 9.932404122642299e-7, -5.0874501293093199e-4, 4.2735056665392884e-4, -1.6858853767910799e-4, -8.1301893922784998e-9, 4.5284402370562147e-5, -3.127053674781734e-5, 1.044986828530338e-5, 4.8435226265680926e-11, -2.1482565873456258e-6, 1.329369701097492e-6, -4.0295693092101029e-7, -1.7567877666323291e-13, 7.0145043163668257e-8, -4.040787734999483e-8, 1.1474026743371963e-8, 3.9642746853563325e-18, -1.7804938269892714e-9, 9.7480262548731646e-10, -2.6405338676507616e-10, 5.794875163403742e-18, 3.7647749553543836e-11}, + {1.579727660730835e-3, 1.6251626278391582e-4, -2.0633421035543276e-3, 2.1389686185689098e-3, -1.0108559391263003e-3, -3.9912705529919201e-7, 3.6235025084764691e-4, -2.8143901463712154e-4, 1.0449513336495887e-4, 2.1211418491830297e-9, -2.5779417251947842e-5, 1.7281818956040463e-5, -5.6413773872904282e-6, -1.1024320105776174e-11, 1.1223224418895175e-6, -6.8693396379526735e-7, 2.0653236975414887e-7, 4.6714772409838506e-14, -3.5609886164949055e-8, 2.0470855345905963e-8, -5.8091738633283358e-9, -1.332821287582869e-16, 9.0354604391335133e-10, -4.9598782517330834e-10, 1.3481607129399749e-10}, + {-4.0725121195140166e-3, 6.4033628338080698e-3, -4.0410161081676618e-3, -2.183732802866233e-6, 2.1740441801254639e-3, -1.9700440518418892e-3, 8.3595469747962458e-4, 1.9445447567109655e-8, -2.5779387120421696e-4, 1.9009987368139304e-4, -6.7696499937438965e-5, -1.4440629666426572e-10, 1.5712512518742269e-5, -1.0304008744776893e-5, 3.304517767401387e-6, 7.9829760242325709e-13, -6.4097794149313004e-7, 3.8894624761300056e-7, -1.1618347644948869e-7, -2.816808630596451e-15, 1.9878012911297093e-8, -1.1407719956357511e-8, 3.2355857064185555e-9, 4.1759468293455945e-20, -5.0423112718105824e-10}, + {-5.9475779383993003e-3, -5.4016476789260452e-4, 8.7910413550767898e-3, -9.8576315587856125e-3, 5.0134695031021538e-3, 1.2807521786221875e-6, -2.0626019342754683e-3, 1.7109128573523058e-3, -6.7695312714133799e-4, -6.9011545676562133e-9, 1.8855128143995902e-4, -1.3395215663491969e-4, 4.6263183033528039e-5, 4.0034230613321351e-11, -1.0255652921494033e-5, 6.612086372797651e-6, -2.0913022027253008e-6, -2.0951775649603837e-13, 3.9756029041993247e-7, -2.3956211978815887e-7, 7.1182883382145864e-8, 8.925574873053455e-16, -1.2101547235064676e-8, 6.9350618248334386e-9, -1.9661464453856102e-9}, + {1.7402027787522711e-2, -2.9527880945699121e-2, 2.0045875571402799e-2, 7.0289515966903407e-6, -1.2375421071343148e-2, 1.1976293444235254e-2, -5.4156038466518525e-3, -6.3290893396418616e-8, 1.8855118129005065e-3, -1.473473274825001e-3, 5.5515810097708387e-4, 5.2406834412550662e-10, -1.4357913535784836e-4, 9.9181293224943297e-5, -3.3460834749478311e-5, -3.5755837291098993e-12, 7.1560851960630076e-6, -4.5516802628155526e-6, 1.4236576649271475e-6, 1.8803149082089664e-14, -2.6623403898929211e-7, 1.5950642189595716e-7, -4.7187514673841102e-8, -6.5107872958755177e-17, 7.9795091026746235e-9}, + {3.0249124160905891e-2, 2.4817436002649977e-3, -4.9939134373457022e-2, 5.9915643009307869e-2, -3.2483207601623391e-2, -5.7212968652103441e-6, 1.5085251778569354e-2, -1.3261324005088445e-2, 5.5515262632426148e-3, 3.0263182257030016e-8, -1.7229548406756723e-3, 1.2893570099929637e-3, -4.6845138348319876e-4, -1.830259937893045e-10, 1.1449739014822654e-4, -7.7378565221244477e-5, 2.5625836246985201e-5, 1.0766165333192814e-12, -5.3246809282422621e-6, 3.349634863064464e-6, -1.0381253128684018e-6, -5.608909920621128e-15, 1.9150821930676591e-7, -1.1418365800203486e-7, 3.3654425209171788e-8}, + {-9.9051020880159045e-2, 1.7954011706123486e-1, -1.2989606383463778e-1, -3.1478872752284357e-5, 9.0510635276848131e-2, -9.2828824411184397e-2, 4.4412112839877808e-2, 2.7779236316835888e-7, -1.7229543805449697e-2, 1.4182925050891573e-2, -5.6214161633747336e-3, -2.39598509186381e-9, 1.6029634366079908e-3, -1.1606784674435773e-3, 4.1001337768153873e-4, 1.8365800754090661e-11, -9.5844256563655903e-5, 6.3643062337764708e-5, -2.076250624489065e-5, -1.1806020912804483e-13, 4.2131808239120649e-6, -2.6262241337012467e-6, 8.0770620494930662e-7, 6.0125912123632725e-16, -1.4729737374018841e-7}, + {-1.9994542198219728e-1, -1.5056113040026424e-2, 3.6470239469348489e-1, -4.6435192311733545e-1, 2.6640934719197893e-1, 3.4038266027147191e-5, -1.3784338709329624e-1, 1.276467178337056e-1, -5.6213828755200985e-2, -1.753150885483011e-7, 1.9235592956768113e-2, -1.5088821281095315e-2, 5.7401854451350123e-3, 1.0622382710310225e-9, -1.5335082692563998e-3, 1.0819320643228214e-3, -3.7372510193945659e-4, -6.6170909729031985e-12, 8.4263617380909628e-5, -5.5150706827483479e-5, 1.7769536448348069e-5, 3.8827923210205533e-14, -3.53513697488768e-6, 2.1865832130045269e-6, -6.6812849447625594e-7}, + {7.2438608504029431e-1, -1.3918010932653375, 1.0654143352413968, 1.876173868950258e-4, -8.2705501176152696e-1, 8.9352433347828414e-1, -4.4971003995291339e-1, -1.6107401567546652e-6, 1.9235590165271091e-1, -1.6597702160042609e-1, 6.8882222681814333e-2, 1.3910091724608687e-8, -2.146911561508663e-2, 1.6228980898865892e-2, -5.9796016172584256e-3, -1.1287469112826745e-10, 1.5167451119784857e-3, -1.0478634293553899e-3, 3.5539072889126421e-4, 8.1704322111801517e-13, -7.7773013442452395e-5, 5.0291413897007722e-5, -1.6035083867000518e-5, 1.2469354315487605e-14, 3.1369106244517615e-6}, + {1.6668949727276811, 1.165462765994632e-1, -3.3288393225018906, 4.4692325482864037, -2.6977693045875807, -2.600667859891061e-4, 1.5389017615694539, -1.4937962361134612, 6.8881964633233148e-1, 1.3077482004552385e-6, -2.5762963325596288e-1, 2.1097676102125449e-1, -8.3714408359219882e-2, -7.7920428881354753e-9, 2.4267923064833599e-2, -1.7813678334552311e-2, 6.3970330388900056e-3, 4.9430807090480523e-11, -1.5554602758465635e-3, 1.0561196919903214e-3, -3.5277184460472902e-4, 9.3002334645022459e-14, 7.5285855026557172e-5, -4.8186515569156351e-5, 1.5227271505597605e-5}, + {-6.6188298861372935, 1.3397985455142589e+1, -1.0789350606845146e+1, -1.4352254537875018e-3, 9.2333694596189809, -1.0456552819547769e+1, 5.5105526029033471, 1.2024439690716742e-5, -2.5762961164755816, 2.3207442745387179, -1.0045728797216284, -1.0207833290021914e-7, 3.3975092171169466e-1, -2.6720517450757468e-1, 1.0235252851562706e-1, 8.4329730484871625e-10, -2.7998284958442595e-2, 2.0066274144976813e-2, -7.0554368915086242e-3, 1.9402238183698188e-12, 1.6562888105449611e-3, -1.1082898580743683e-3, 3.654545161310169e-4, -5.1290032026971794e-11, -7.6340103696869031e-5}, + {-1.7112706061976095e+1, -1.1208044642899116, 3.7131966511885444e+1, -5.2298271025348962e+1, 3.3058589696624618e+1, 2.4791298976200222e-3, -2.061089403411526e+1, 2.088672775145582e+1, -1.0045703956517752e+1, -1.2238783449063012e-5, 4.0770134274221141, -3.473667358470195, 1.4329352617312006, 7.1359914411879712e-8, -4.4797257159115612e-1, 3.4112666080644461e-1, -1.2699786326594923e-1, -2.8953677269081528e-10, 3.3125776278259863e-2, -2.3274087021036101e-2, 8.0399993503648882e-3, -1.177805216235265e-9, -1.8321624891071668e-3, 1.2108282933588665e-3, -3.9479941246822517e-4}, + {7.389033153567425e+1, -1.5680141270402273e+2, 1.322177542759164e+2, 1.3692876877324546e-2, -1.2366496885920151e+2, 1.4620689391062729e+2, -8.0365587724865346e+1, -1.1259851148881298e-4, 4.0770132196179938e+1, -3.8210340013273034e+1, 1.719522294277362e+1, 9.3519707955168356e-7, -6.2716159907747034, 5.1168999071852637, -2.0319658112299095, -4.9507215582761543e-9, 5.9626397294332597e-1, -4.4220765337238094e-1, 1.6079998700166273e-1, -2.4733786203223402e-8, -4.0307574759979762e-2, 2.7849050747097869e-2, -9.4751858992054221e-3, 6.419922235909132e-6, 2.1250180774699461e-3}, + {2.1216837098382522e+2, 1.3107863022633868e+1, -4.9698285932871748e+2, 7.3121595266969204e+2, -4.8213821720890847e+2, -2.8817248692894889e-2, 3.2616720302947102e+2, -3.4389340280087117e+2, 1.7195193870816232e+2, 1.4038077378096158e-4, -7.52594195897599e+1, 6.651969984520934e+1, -2.8447519748152462e+1, -7.613702615875391e-7, 9.5402237105304373, -7.5175301113311376, 2.8943997568871961, -4.6612194999538201e-7, -8.0615149598794088e-1, 5.8483006570631029e-1, -2.0845408972964956e-1, 1.4765818959305817e-4, 5.1000433863753019e-2, -3.3066252141883665e-2, 1.5109265210467774e-2}, + {-9.8959643098322368e+2, 2.1925555360905233e+3, -1.9283586782723356e+3, -1.5925738122215253e-1, 1.9569985945919857e+3, -2.4072514765081556e+3, 1.3756149959336496e+3, 1.2920735237496668e-3, -7.525941715948055e+2, 7.3171668742208716e+2, -3.4137023466220065e+2, -9.9857390260608043e-6, 1.3356313181291573e+2, -1.1276295161252794e+2, 4.6310396098204458e+1, -7.9237387133614756e-6, -1.4510726927018646e+1, 1.1111771248100563e+1, -4.1690817945270892, 3.1008219800117808e-3, 1.1220095449981468, -7.6052379926149916e-1, 3.6262236505085254e-1, 2.216867741940747e-1, 4.8683443692930507e-1}, +} + +// Igam computes the incomplete Gamma integral. +// +// Igam(a,x) = (1/ Γ(a)) \int_0^x e^{-t} t^{a-1} dt +// +// The input argument a must be positive and x must be non-negative or Igam +// will panic. +func Igam(a, x float64) float64 { + // The integral is evaluated by either a power series or continued fraction + // expansion, depending on the relative values of a and x. + // Sources: + // [1] "The Digital Library of Mathematical Functions", dlmf.nist.gov + // [2] Maddock et. al., "Incomplete Gamma Functions", + // http://www.boost.org/doc/libs/1_61_0/libs/math/doc/html/math_toolkit/sf_gamma/igamma.html + + // Check zero integration limit first + if x == 0 { + return 0 + } + + if x < 0 || a <= 0 { + panic(paramOutOfBounds) + } + + // Asymptotic regime where a ~ x; see [2]. + absxmaA := math.Abs(x-a) / a + if (igamSmall < a && a < igamLarge && absxmaA < igamSmallRatio) || + (igamLarge < a && absxmaA < igamLargeRatio/math.Sqrt(a)) { + return asymptoticSeries(a, x, igam) + } + + if x > 1 && x > a { + return 1 - IgamC(a, x) + } + + return igamSeries(a, x) +} + +// IgamC computes the complemented incomplete Gamma integral. +// +// IgamC(a,x) = 1 - Igam(a,x) +// = (1/ Γ(a)) \int_0^\infty e^{-t} t^{a-1} dt +// +// The input argument a must be positive and x must be non-negative or +// IgamC will panic. +func IgamC(a, x float64) float64 { + // The integral is evaluated by either a power series or continued fraction + // expansion, depending on the relative values of a and x. + // Sources: + // [1] "The Digital Library of Mathematical Functions", dlmf.nist.gov + // [2] Maddock et. al., "Incomplete Gamma Functions", + // http://www.boost.org/doc/libs/1_61_0/libs/math/doc/html/math_toolkit/sf_gamma/igamma.html + + switch { + case x < 0, a <= 0: + panic(paramOutOfBounds) + case x == 0: + return 1 + case math.IsInf(x, 0): + return 0 + } + + // Asymptotic regime where a ~ x; see [2]. + absxmaA := math.Abs(x-a) / a + if (igamSmall < a && a < igamLarge && absxmaA < igamSmallRatio) || + (igamLarge < a && absxmaA < igamLargeRatio/math.Sqrt(a)) { + return asymptoticSeries(a, x, igamC) + } + + // Everywhere else; see [2]. + if x > 1.1 { + if x < a { + return 1 - igamSeries(a, x) + } + return igamCContinuedFraction(a, x) + } else if x <= 0.5 { + if -0.4/math.Log(x) < a { + return 1 - igamSeries(a, x) + } + return igamCSeries(a, x) + } + + if x*1.1 < a { + return 1 - igamSeries(a, x) + } + return igamCSeries(a, x) +} + +// igamFac computes +// +// x^a * e^{-x} / Γ(a) +// +// corrected from (15) and (16) in [2] by replacing +// +// e^{x - a} +// +// with +// +// e^{a - x} +func igamFac(a, x float64) float64 { + if math.Abs(a-x) > 0.4*math.Abs(a) { + ax := a*math.Log(x) - x - lgam(a) + return math.Exp(ax) + } + + fac := a + lanczosG - 0.5 + res := math.Sqrt(fac/math.Exp(1)) / lanczosSumExpgScaled(a) + + if a < 200 && x < 200 { + res *= math.Exp(a-x) * math.Pow(x/fac, a) + } else { + num := x - a - lanczosG + 0.5 + res *= math.Exp(a*log1pmx(num/fac) + x*(0.5-lanczosG)/fac) + } + + return res +} + +// igamCContinuedFraction computes IgamC using DLMF 8.9.2. +func igamCContinuedFraction(a, x float64) float64 { + ax := igamFac(a, x) + if ax == 0 { + return 0 + } + + // Continued fraction + y := 1 - a + z := x + y + 1 + c := 0.0 + pkm2 := 1.0 + qkm2 := x + pkm1 := x + 1.0 + qkm1 := z * x + ans := pkm1 / qkm1 + + for i := 0; i < maxIter; i++ { + c += 1.0 + y += 1.0 + z += 2.0 + yc := y * c + pk := pkm1*z - pkm2*yc + qk := qkm1*z - qkm2*yc + var t float64 + if qk != 0 { + r := pk / qk + t = math.Abs((ans - r) / r) + ans = r + } else { + t = 1.0 + } + pkm2 = pkm1 + pkm1 = pk + qkm2 = qkm1 + qkm1 = qk + if math.Abs(pk) > big { + pkm2 *= biginv + pkm1 *= biginv + qkm2 *= biginv + qkm1 *= biginv + } + if t <= machEp { + break + } + } + + return ans * ax +} + +// igamSeries computes Igam using DLMF 8.11.4. +func igamSeries(a, x float64) float64 { + ax := igamFac(a, x) + if ax == 0 { + return 0 + } + + // Power series + r := a + c := 1.0 + ans := 1.0 + + for i := 0; i < maxIter; i++ { + r += 1.0 + c *= x / r + ans += c + if c <= machEp*ans { + break + } + } + + return ans * ax / a +} + +// igamCSeries computes IgamC using DLMF 8.7.3. This is related to the series +// in igamSeries but extra care is taken to avoid cancellation. +func igamCSeries(a, x float64) float64 { + fac := 1.0 + sum := 0.0 + + for n := 1; n < maxIter; n++ { + fac *= -x / float64(n) + term := fac / (a + float64(n)) + sum += term + if math.Abs(term) <= machEp*math.Abs(sum) { + break + } + } + + logx := math.Log(x) + term := -expm1(a*logx - lgam1p(a)) + return term - math.Exp(a*logx-lgam(a))*sum +} + +// asymptoticSeries computes Igam/IgamC using DLMF 8.12.3/8.12.4. +func asymptoticSeries(a, x float64, fun int) float64 { + maxpow := 0 + lambda := x / a + sigma := (x - a) / a + absoldterm := math.MaxFloat64 + etapow := [igamDimN]float64{1} + sum := 0.0 + afac := 1.0 + + var sgn float64 + if fun == igam { + sgn = -1 + } else { + sgn = 1 + } + + var eta float64 + if lambda > 1 { + eta = math.Sqrt(-2 * log1pmx(sigma)) + } else if lambda < 1 { + eta = -math.Sqrt(-2 * log1pmx(sigma)) + } else { + eta = 0 + } + res := 0.5 * math.Erfc(sgn*eta*math.Sqrt(a/2)) + + for k := 0; k < igamDimK; k++ { + ck := igamCoefs[k][0] + for n := 1; n < igamDimN; n++ { + if n > maxpow { + etapow[n] = eta * etapow[n-1] + maxpow++ + } + ckterm := igamCoefs[k][n] * etapow[n] + ck += ckterm + if math.Abs(ckterm) < machEp*math.Abs(ck) { + break + } + } + term := ck * afac + absterm := math.Abs(term) + if absterm > absoldterm { + break + } + sum += term + if absterm < machEp*math.Abs(sum) { + break + } + absoldterm = absterm + afac /= a + } + res += sgn * math.Exp(-0.5*a*eta*eta) * sum / math.Sqrt(2*math.Pi*a) + + return res +} diff --git a/vendor/gonum.org/v1/gonum/mathext/internal/cephes/igami.go b/vendor/gonum.org/v1/gonum/mathext/internal/cephes/igami.go new file mode 100644 index 0000000000..bb80b9cf83 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/mathext/internal/cephes/igami.go @@ -0,0 +1,155 @@ +// Derived from SciPy's special/cephes/igami.c +// https://github.com/scipy/scipy/blob/master/scipy/special/cephes/igami.c +// Made freely available by Stephen L. Moshier without support or guarantee. + +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. +// Copyright ©1984, ©1987, ©1995 by Stephen L. Moshier +// Portions Copyright ©2017 The Gonum Authors. All rights reserved. + +package cephes + +import "math" + +// IgamI computes the inverse of the incomplete Gamma function. That is, it +// returns the x such that: +// +// IgamC(a, x) = p +// +// The input argument a must be positive and p must be between 0 and 1 +// inclusive or IgamI will panic. IgamI should return a positive number, but +// can return 0 even with non-zero y due to underflow. +func IgamI(a, p float64) float64 { + // Bound the solution + x0 := math.MaxFloat64 + yl := 0.0 + x1 := 0.0 + yh := 1.0 + dithresh := 5.0 * machEp + + if p < 0 || p > 1 || a <= 0 { + panic(paramOutOfBounds) + } + + if p == 0 { + return math.Inf(1) + } + + if p == 1 { + return 0.0 + } + + // Starting with the approximate value + // x = a y^3 + // where + // y = 1 - d - ndtri(p) sqrt(d) + // and + // d = 1/9a + // the routine performs up to 10 Newton iterations to find the root of + // IgamC(a, x) - p = 0 + d := 1.0 / (9.0 * a) + y := 1.0 - d - Ndtri(p)*math.Sqrt(d) + x := a * y * y * y + + lgm := lgam(a) + + for i := 0; i < 10; i++ { + if x > x0 || x < x1 { + break + } + + y = IgamC(a, x) + + if y < yl || y > yh { + break + } + + if y < p { + x0 = x + yl = y + } else { + x1 = x + yh = y + } + + // Compute the derivative of the function at this point + d = (a-1)*math.Log(x) - x - lgm + if d < -maxLog { + break + } + d = -math.Exp(d) + + // Compute the step to the next approximation of x + d = (y - p) / d + if math.Abs(d/x) < machEp { + return x + } + x = x - d + } + + d = 0.0625 + if x0 == math.MaxFloat64 { + if x <= 0 { + x = 1 + } + for x0 == math.MaxFloat64 { + x = (1 + d) * x + y = IgamC(a, x) + if y < p { + x0 = x + yl = y + break + } + d = d + d + } + } + + d = 0.5 + dir := 0 + for i := 0; i < 400; i++ { + x = x1 + d*(x0-x1) + y = IgamC(a, x) + + lgm = (x0 - x1) / (x1 + x0) + if math.Abs(lgm) < dithresh { + break + } + + lgm = (y - p) / p + if math.Abs(lgm) < dithresh { + break + } + + if x <= 0 { + break + } + + if y >= p { + x1 = x + yh = y + if dir < 0 { + dir = 0 + d = 0.5 + } else if dir > 1 { + d = 0.5*d + 0.5 + } else { + d = (p - yl) / (yh - yl) + } + dir++ + } else { + x0 = x + yl = y + if dir > 0 { + dir = 0 + d = 0.5 + } else if dir < -1 { + d = 0.5 * d + } else { + d = (p - yl) / (yh - yl) + } + dir-- + } + } + + return x +} diff --git a/vendor/gonum.org/v1/gonum/mathext/internal/cephes/incbeta.go b/vendor/gonum.org/v1/gonum/mathext/internal/cephes/incbeta.go new file mode 100644 index 0000000000..6a818154f6 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/mathext/internal/cephes/incbeta.go @@ -0,0 +1,312 @@ +// Copyright ©2016 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +/* + * Cephes Math Library, Release 2.3: March, 1995 + * Copyright 1984, 1995 by Stephen L. Moshier + */ + +package cephes + +import ( + "math" + + "gonum.org/v1/gonum/mathext/internal/gonum" +) + +const ( + maxGam = 171.624376956302725 + big = 4.503599627370496e15 + biginv = 2.22044604925031308085e-16 +) + +// Incbet computes the regularized incomplete beta function. +func Incbet(aa, bb, xx float64) float64 { + if aa <= 0 || bb <= 0 { + panic(paramOutOfBounds) + } + if xx <= 0 || xx >= 1 { + if xx == 0 { + return 0 + } + if xx == 1 { + return 1 + } + panic(paramOutOfBounds) + } + + var flag int + if bb*xx <= 1 && xx <= 0.95 { + t := pseries(aa, bb, xx) + return transformT(t, flag) + } + + w := 1 - xx + + // Reverse a and b if x is greater than the mean. + var a, b, xc, x float64 + if xx > aa/(aa+bb) { + flag = 1 + a = bb + b = aa + xc = xx + x = w + } else { + a = aa + b = bb + xc = w + x = xx + } + + if flag == 1 && (b*x) <= 1.0 && x <= 0.95 { + t := pseries(a, b, x) + return transformT(t, flag) + } + + // Choose expansion for better convergence. + y := x*(a+b-2.0) - (a - 1.0) + if y < 0.0 { + w = incbcf(a, b, x) + } else { + w = incbd(a, b, x) / xc + } + + // Multiply w by the factor + // x^a * (1-x)^b * Γ(a+b) / (a*Γ(a)*Γ(b)) + var t float64 + y = a * math.Log(x) + t = b * math.Log(xc) + if (a+b) < maxGam && math.Abs(y) < maxLog && math.Abs(t) < maxLog { + t = math.Pow(xc, b) + t *= math.Pow(x, a) + t /= a + t *= w + t *= 1.0 / gonum.Beta(a, b) + return transformT(t, flag) + } + + // Resort to logarithms. + y += t - gonum.Lbeta(a, b) + y += math.Log(w / a) + if y < minLog { + t = 0.0 + } else { + t = math.Exp(y) + } + + return transformT(t, flag) +} + +func transformT(t float64, flag int) float64 { + if flag == 1 { + if t <= machEp { + t = 1.0 - machEp + } else { + t = 1.0 - t + } + } + return t +} + +// incbcf returns the incomplete beta integral evaluated by a continued fraction +// expansion. +func incbcf(a, b, x float64) float64 { + var xk, pk, pkm1, pkm2, qk, qkm1, qkm2 float64 + var k1, k2, k3, k4, k5, k6, k7, k8 float64 + var r, t, ans, thresh float64 + var n int + + k1 = a + k2 = a + b + k3 = a + k4 = a + 1.0 + k5 = 1.0 + k6 = b - 1.0 + k7 = k4 + k8 = a + 2.0 + + pkm2 = 0.0 + qkm2 = 1.0 + pkm1 = 1.0 + qkm1 = 1.0 + ans = 1.0 + r = 1.0 + thresh = 3.0 * machEp + + for n = 0; n <= 300; n++ { + + xk = -(x * k1 * k2) / (k3 * k4) + pk = pkm1 + pkm2*xk + qk = qkm1 + qkm2*xk + pkm2 = pkm1 + pkm1 = pk + qkm2 = qkm1 + qkm1 = qk + + xk = (x * k5 * k6) / (k7 * k8) + pk = pkm1 + pkm2*xk + qk = qkm1 + qkm2*xk + pkm2 = pkm1 + pkm1 = pk + qkm2 = qkm1 + qkm1 = qk + + if qk != 0 { + r = pk / qk + } + if r != 0 { + t = math.Abs((ans - r) / r) + ans = r + } else { + t = 1.0 + } + + if t < thresh { + return ans + } + + k1 += 1.0 + k2 += 1.0 + k3 += 2.0 + k4 += 2.0 + k5 += 1.0 + k6 -= 1.0 + k7 += 2.0 + k8 += 2.0 + + if (math.Abs(qk) + math.Abs(pk)) > big { + pkm2 *= biginv + pkm1 *= biginv + qkm2 *= biginv + qkm1 *= biginv + } + if (math.Abs(qk) < biginv) || (math.Abs(pk) < biginv) { + pkm2 *= big + pkm1 *= big + qkm2 *= big + qkm1 *= big + } + } + + return ans +} + +// incbd returns the incomplete beta integral evaluated by a continued fraction +// expansion. +func incbd(a, b, x float64) float64 { + var xk, pk, pkm1, pkm2, qk, qkm1, qkm2 float64 + var k1, k2, k3, k4, k5, k6, k7, k8 float64 + var r, t, ans, z, thresh float64 + var n int + + k1 = a + k2 = b - 1.0 + k3 = a + k4 = a + 1.0 + k5 = 1.0 + k6 = a + b + k7 = a + 1.0 + k8 = a + 2.0 + + pkm2 = 0.0 + qkm2 = 1.0 + pkm1 = 1.0 + qkm1 = 1.0 + z = x / (1.0 - x) + ans = 1.0 + r = 1.0 + thresh = 3.0 * machEp + for n = 0; n <= 300; n++ { + + xk = -(z * k1 * k2) / (k3 * k4) + pk = pkm1 + pkm2*xk + qk = qkm1 + qkm2*xk + pkm2 = pkm1 + pkm1 = pk + qkm2 = qkm1 + qkm1 = qk + + xk = (z * k5 * k6) / (k7 * k8) + pk = pkm1 + pkm2*xk + qk = qkm1 + qkm2*xk + pkm2 = pkm1 + pkm1 = pk + qkm2 = qkm1 + qkm1 = qk + + if qk != 0 { + r = pk / qk + } + if r != 0 { + t = math.Abs((ans - r) / r) + ans = r + } else { + t = 1.0 + } + + if t < thresh { + return ans + } + + k1 += 1.0 + k2 -= 1.0 + k3 += 2.0 + k4 += 2.0 + k5 += 1.0 + k6 += 1.0 + k7 += 2.0 + k8 += 2.0 + + if (math.Abs(qk) + math.Abs(pk)) > big { + pkm2 *= biginv + pkm1 *= biginv + qkm2 *= biginv + qkm1 *= biginv + } + if (math.Abs(qk) < biginv) || (math.Abs(pk) < biginv) { + pkm2 *= big + pkm1 *= big + qkm2 *= big + qkm1 *= big + } + } + return ans +} + +// pseries returns the incomplete beta integral evaluated by a power series. Use +// when b*x is small and x not too close to 1. +func pseries(a, b, x float64) float64 { + var s, t, u, v, n, t1, z, ai float64 + ai = 1.0 / a + u = (1.0 - b) * x + v = u / (a + 1.0) + t1 = v + t = u + n = 2.0 + s = 0.0 + z = machEp * ai + for math.Abs(v) > z { + u = (n - b) * x / n + t *= u + v = t / (a + n) + s += v + n += 1.0 + } + s += t1 + s += ai + + u = a * math.Log(x) + if (a+b) < maxGam && math.Abs(u) < maxLog { + t = 1.0 / gonum.Beta(a, b) + s = s * t * math.Pow(x, a) + } else { + t = -gonum.Lbeta(a, b) + u + math.Log(s) + if t < minLog { + s = 0.0 + } else { + s = math.Exp(t) + } + } + return (s) +} diff --git a/vendor/gonum.org/v1/gonum/mathext/internal/cephes/incbi.go b/vendor/gonum.org/v1/gonum/mathext/internal/cephes/incbi.go new file mode 100644 index 0000000000..2b612d83f9 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/mathext/internal/cephes/incbi.go @@ -0,0 +1,247 @@ +// Copyright ©2016 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +/* + * Cephes Math Library Release 2.4: March,1996 + * Copyright 1984, 1996 by Stephen L. Moshier + */ + +package cephes + +import "math" + +// Incbi computes the inverse of the regularized incomplete beta integral. +func Incbi(aa, bb, yy0 float64) float64 { + var a, b, y0, d, y, x, x0, x1, lgm, yp, di, dithresh, yl, yh, xt float64 + var i, rflg, dir, nflg int + + if yy0 <= 0 { + return (0.0) + } + if yy0 >= 1.0 { + return (1.0) + } + x0 = 0.0 + yl = 0.0 + x1 = 1.0 + yh = 1.0 + nflg = 0 + + if aa <= 1.0 || bb <= 1.0 { + dithresh = 1.0e-6 + rflg = 0 + a = aa + b = bb + y0 = yy0 + x = a / (a + b) + y = Incbet(a, b, x) + goto ihalve + } else { + dithresh = 1.0e-4 + } + // Approximation to inverse function + yp = -Ndtri(yy0) + + if yy0 > 0.5 { + rflg = 1 + a = bb + b = aa + y0 = 1.0 - yy0 + yp = -yp + } else { + rflg = 0 + a = aa + b = bb + y0 = yy0 + } + + lgm = (yp*yp - 3.0) / 6.0 + x = 2.0 / (1.0/(2.0*a-1.0) + 1.0/(2.0*b-1.0)) + d = yp*math.Sqrt(x+lgm)/x - (1.0/(2.0*b-1.0)-1.0/(2.0*a-1.0))*(lgm+5.0/6.0-2.0/(3.0*x)) + d = 2.0 * d + if d < minLog { + // mtherr("incbi", UNDERFLOW) + x = 0 + goto done + } + x = a / (a + b*math.Exp(d)) + y = Incbet(a, b, x) + yp = (y - y0) / y0 + if math.Abs(yp) < 0.2 { + goto newt + } + + /* Resort to interval halving if not close enough. */ +ihalve: + + dir = 0 + di = 0.5 + for i = 0; i < 100; i++ { + if i != 0 { + x = x0 + di*(x1-x0) + if x == 1.0 { + x = 1.0 - machEp + } + if x == 0.0 { + di = 0.5 + x = x0 + di*(x1-x0) + if x == 0.0 { + // mtherr("incbi", UNDERFLOW) + goto done + } + } + y = Incbet(a, b, x) + yp = (x1 - x0) / (x1 + x0) + if math.Abs(yp) < dithresh { + goto newt + } + yp = (y - y0) / y0 + if math.Abs(yp) < dithresh { + goto newt + } + } + if y < y0 { + x0 = x + yl = y + if dir < 0 { + dir = 0 + di = 0.5 + } else if dir > 3 { + di = 1.0 - (1.0-di)*(1.0-di) + } else if dir > 1 { + di = 0.5*di + 0.5 + } else { + di = (y0 - y) / (yh - yl) + } + dir += 1 + if x0 > 0.75 { + if rflg == 1 { + rflg = 0 + a = aa + b = bb + y0 = yy0 + } else { + rflg = 1 + a = bb + b = aa + y0 = 1.0 - yy0 + } + x = 1.0 - x + y = Incbet(a, b, x) + x0 = 0.0 + yl = 0.0 + x1 = 1.0 + yh = 1.0 + goto ihalve + } + } else { + x1 = x + if rflg == 1 && x1 < machEp { + x = 0.0 + goto done + } + yh = y + if dir > 0 { + dir = 0 + di = 0.5 + } else if dir < -3 { + di = di * di + } else if dir < -1 { + di = 0.5 * di + } else { + di = (y - y0) / (yh - yl) + } + dir -= 1 + } + } + // mtherr("incbi", PLOSS) + if x0 >= 1.0 { + x = 1.0 - machEp + goto done + } + if x <= 0.0 { + // mtherr("incbi", UNDERFLOW) + x = 0.0 + goto done + } + +newt: + if nflg > 0 { + goto done + } + nflg = 1 + lgm = lgam(a+b) - lgam(a) - lgam(b) + + for i = 0; i < 8; i++ { + /* Compute the function at this point. */ + if i != 0 { + y = Incbet(a, b, x) + } + if y < yl { + x = x0 + y = yl + } else if y > yh { + x = x1 + y = yh + } else if y < y0 { + x0 = x + yl = y + } else { + x1 = x + yh = y + } + if x == 1.0 || x == 0.0 { + break + } + /* Compute the derivative of the function at this point. */ + d = (a-1.0)*math.Log(x) + (b-1.0)*math.Log(1.0-x) + lgm + if d < minLog { + goto done + } + if d > maxLog { + break + } + d = math.Exp(d) + /* Compute the step to the next approximation of x. */ + d = (y - y0) / d + xt = x - d + if xt <= x0 { + y = (x - x0) / (x1 - x0) + xt = x0 + 0.5*y*(x-x0) + if xt <= 0.0 { + break + } + } + if xt >= x1 { + y = (x1 - x) / (x1 - x0) + xt = x1 - 0.5*y*(x1-x) + if xt >= 1.0 { + break + } + } + x = xt + if math.Abs(d/x) < 128.0*machEp { + goto done + } + } + /* Did not converge. */ + dithresh = 256.0 * machEp + goto ihalve + +done: + + if rflg > 0 { + if x <= machEp { + x = 1.0 - machEp + } else { + x = 1.0 - x + } + } + return (x) +} + +func lgam(a float64) float64 { + lg, _ := math.Lgamma(a) + return lg +} diff --git a/vendor/gonum.org/v1/gonum/mathext/internal/cephes/lanczos.go b/vendor/gonum.org/v1/gonum/mathext/internal/cephes/lanczos.go new file mode 100644 index 0000000000..ec29161f3e --- /dev/null +++ b/vendor/gonum.org/v1/gonum/mathext/internal/cephes/lanczos.go @@ -0,0 +1,153 @@ +// Derived from SciPy's special/cephes/lanczos.c +// https://github.com/scipy/scipy/blob/master/scipy/special/cephes/lanczos.c + +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. +// Copyright ©2006 John Maddock +// Portions Copyright ©2003 Boost +// Portions Copyright ©2016 The Gonum Authors. All rights reserved. + +package cephes + +// Optimal values for G for each N are taken from +// http://web.mala.bc.ca/pughg/phdThesis/phdThesis.pdf, +// as are the theoretical error bounds. + +// Constants calculated using the method described by Godfrey +// http://my.fit.edu/~gabdo/gamma.txt and elaborated by Toth at +// http://www.rskey.org/gamma.htm using NTL::RR at 1000 bit precision. + +var lanczosNum = [...]float64{ + 2.506628274631000270164908177133837338626, + 210.8242777515793458725097339207133627117, + 8071.672002365816210638002902272250613822, + 186056.2653952234950402949897160456992822, + 2876370.628935372441225409051620849613599, + 31426415.58540019438061423162831820536287, + 248874557.8620541565114603864132294232163, + 1439720407.311721673663223072794912393972, + 6039542586.35202800506429164430729792107, + 17921034426.03720969991975575445893111267, + 35711959237.35566804944018545154716670596, + 42919803642.64909876895789904700198885093, + 23531376880.41075968857200767445163675473, +} + +var lanczosDenom = [...]float64{ + 1, + 66, + 1925, + 32670, + 357423, + 2637558, + 13339535, + 45995730, + 105258076, + 150917976, + 120543840, + 39916800, + 0, +} + +var lanczosSumExpgScaledNum = [...]float64{ + 0.006061842346248906525783753964555936883222, + 0.5098416655656676188125178644804694509993, + 19.51992788247617482847860966235652136208, + 449.9445569063168119446858607650988409623, + 6955.999602515376140356310115515198987526, + 75999.29304014542649875303443598909137092, + 601859.6171681098786670226533699352302507, + 3481712.15498064590882071018964774556468, + 14605578.08768506808414169982791359218571, + 43338889.32467613834773723740590533316085, + 86363131.28813859145546927288977868422342, + 103794043.1163445451906271053616070238554, + 56906521.91347156388090791033559122686859, +} + +var lanczosSumExpgScaledDenom = [...]float64{ + 1, + 66, + 1925, + 32670, + 357423, + 2637558, + 13339535, + 45995730, + 105258076, + 150917976, + 120543840, + 39916800, + 0, +} + +var lanczosSumNear1D = [...]float64{ + 0.3394643171893132535170101292240837927725e-9, + -0.2499505151487868335680273909354071938387e-8, + 0.8690926181038057039526127422002498960172e-8, + -0.1933117898880828348692541394841204288047e-7, + 0.3075580174791348492737947340039992829546e-7, + -0.2752907702903126466004207345038327818713e-7, + -0.1515973019871092388943437623825208095123e-5, + 0.004785200610085071473880915854204301886437, + -0.1993758927614728757314233026257810172008, + 1.483082862367253753040442933770164111678, + -3.327150580651624233553677113928873034916, + 2.208709979316623790862569924861841433016, +} + +var lanczosSumNear2D = [...]float64{ + 0.1009141566987569892221439918230042368112e-8, + -0.7430396708998719707642735577238449585822e-8, + 0.2583592566524439230844378948704262291927e-7, + -0.5746670642147041587497159649318454348117e-7, + 0.9142922068165324132060550591210267992072e-7, + -0.8183698410724358930823737982119474130069e-7, + -0.4506604409707170077136555010018549819192e-5, + 0.01422519127192419234315002746252160965831, + -0.5926941084905061794445733628891024027949, + 4.408830289125943377923077727900630927902, + -9.8907772644920670589288081640128194231, + 6.565936202082889535528455955485877361223, +} + +const lanczosG = 6.024680040776729583740234375 + +func lanczosSum(x float64) float64 { + return ratevl(x, + lanczosNum[:], + len(lanczosNum)-1, + lanczosDenom[:], + len(lanczosDenom)-1) +} + +func lanczosSumExpgScaled(x float64) float64 { + return ratevl(x, + lanczosSumExpgScaledNum[:], + len(lanczosSumExpgScaledNum)-1, + lanczosSumExpgScaledDenom[:], + len(lanczosSumExpgScaledDenom)-1) +} + +func lanczosSumNear1(dx float64) float64 { + var result float64 + + for i, val := range lanczosSumNear1D { + k := float64(i + 1) + result += (-val * dx) / (k*dx + k*k) + } + + return result +} + +func lanczosSumNear2(dx float64) float64 { + var result float64 + x := dx + 2 + + for i, val := range lanczosSumNear2D { + k := float64(i + 1) + result += (-val * dx) / (x + k*x + k*k - 1) + } + + return result +} diff --git a/vendor/gonum.org/v1/gonum/mathext/internal/cephes/ndtri.go b/vendor/gonum.org/v1/gonum/mathext/internal/cephes/ndtri.go new file mode 100644 index 0000000000..03910ff8f4 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/mathext/internal/cephes/ndtri.go @@ -0,0 +1,150 @@ +// Copyright ©2016 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +/* + * Cephes Math Library Release 2.1: January, 1989 + * Copyright 1984, 1987, 1989 by Stephen L. Moshier + * Direct inquiries to 30 Frost Street, Cambridge, MA 02140 + */ + +package cephes + +import "math" + +// TODO(btracey): There is currently an implementation of this functionality +// in gonum/stat/distuv. Find out which implementation is better, and rectify +// by having distuv call this, or moving this implementation into +// gonum/mathext/internal/gonum. + +// math.Sqrt(2*pi) +const s2pi = 2.50662827463100050242e0 + +// approximation for 0 <= |y - 0.5| <= 3/8 +var P0 = [5]float64{ + -5.99633501014107895267e1, + 9.80010754185999661536e1, + -5.66762857469070293439e1, + 1.39312609387279679503e1, + -1.23916583867381258016e0, +} + +var Q0 = [8]float64{ + /* 1.00000000000000000000E0, */ + 1.95448858338141759834e0, + 4.67627912898881538453e0, + 8.63602421390890590575e1, + -2.25462687854119370527e2, + 2.00260212380060660359e2, + -8.20372256168333339912e1, + 1.59056225126211695515e1, + -1.18331621121330003142e0, +} + +// Approximation for interval z = math.Sqrt(-2 log y ) between 2 and 8 +// i.e., y between exp(-2) = .135 and exp(-32) = 1.27e-14. +var P1 = [9]float64{ + 4.05544892305962419923e0, + 3.15251094599893866154e1, + 5.71628192246421288162e1, + 4.40805073893200834700e1, + 1.46849561928858024014e1, + 2.18663306850790267539e0, + -1.40256079171354495875e-1, + -3.50424626827848203418e-2, + -8.57456785154685413611e-4, +} + +var Q1 = [8]float64{ + /* 1.00000000000000000000E0, */ + 1.57799883256466749731e1, + 4.53907635128879210584e1, + 4.13172038254672030440e1, + 1.50425385692907503408e1, + 2.50464946208309415979e0, + -1.42182922854787788574e-1, + -3.80806407691578277194e-2, + -9.33259480895457427372e-4, +} + +// Approximation for interval z = math.Sqrt(-2 log y ) between 8 and 64 +// i.e., y between exp(-32) = 1.27e-14 and exp(-2048) = 3.67e-890. +var P2 = [9]float64{ + 3.23774891776946035970e0, + 6.91522889068984211695e0, + 3.93881025292474443415e0, + 1.33303460815807542389e0, + 2.01485389549179081538e-1, + 1.23716634817820021358e-2, + 3.01581553508235416007e-4, + 2.65806974686737550832e-6, + 6.23974539184983293730e-9, +} + +var Q2 = [8]float64{ + /* 1.00000000000000000000E0, */ + 6.02427039364742014255e0, + 3.67983563856160859403e0, + 1.37702099489081330271e0, + 2.16236993594496635890e-1, + 1.34204006088543189037e-2, + 3.28014464682127739104e-4, + 2.89247864745380683936e-6, + 6.79019408009981274425e-9, +} + +// Ndtri returns the argument, x, for which the area under the +// Gaussian probability density function (integrated from +// minus infinity to x) is equal to y. +func Ndtri(y0 float64) float64 { + // For small arguments 0 < y < exp(-2), the program computes + // z = math.Sqrt( -2.0 * math.Log(y) ); then the approximation is + // x = z - math.Log(z)/z - (1/z) P(1/z) / Q(1/z). + // There are two rational functions P/Q, one for 0 < y < exp(-32) + // and the other for y up to exp(-2). For larger arguments, + // w = y - 0.5, and x/math.Sqrt(2pi) = w + w**3 R(w**2)/S(w**2)). + var x, y, z, y2, x0, x1 float64 + var code int + + if y0 <= 0.0 { + if y0 < 0 { + panic(paramOutOfBounds) + } + return math.Inf(-1) + } + if y0 >= 1.0 { + if y0 > 1 { + panic(paramOutOfBounds) + } + return math.Inf(1) + } + code = 1 + y = y0 + if y > (1.0 - 0.13533528323661269189) { /* 0.135... = exp(-2) */ + y = 1.0 - y + code = 0 + } + + if y > 0.13533528323661269189 { + y = y - 0.5 + y2 = y * y + x = y + y*(y2*polevl(y2, P0[:], 4)/p1evl(y2, Q0[:], 8)) + x = x * s2pi + return (x) + } + + x = math.Sqrt(-2.0 * math.Log(y)) + x0 = x - math.Log(x)/x + + z = 1.0 / x + if x < 8.0 { /* y > exp(-32) = 1.2664165549e-14 */ + x1 = z * polevl(z, P1[:], 8) / p1evl(z, Q1[:], 8) + } else { + x1 = z * polevl(z, P2[:], 8) / p1evl(z, Q2[:], 8) + } + x = x0 - x1 + if code != 0 { + x = -x + } + return (x) +} diff --git a/vendor/gonum.org/v1/gonum/mathext/internal/cephes/polevl.go b/vendor/gonum.org/v1/gonum/mathext/internal/cephes/polevl.go new file mode 100644 index 0000000000..aec399f372 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/mathext/internal/cephes/polevl.go @@ -0,0 +1,84 @@ +// Derived from SciPy's special/cephes/polevl.h +// https://github.com/scipy/scipy/blob/master/scipy/special/cephes/polevl.h +// Made freely available by Stephen L. Moshier without support or guarantee. + +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. +// Copyright ©1984, ©1987, ©1988 by Stephen L. Moshier +// Portions Copyright ©2016 The Gonum Authors. All rights reserved. + +package cephes + +import "math" + +// polevl evaluates a polynomial of degree N +// +// y = c_0 + c_1 x_1 + c_2 x_2^2 ... +// +// where the coefficients are stored in reverse order, i.e. coef[0] = c_n and +// coef[n] = c_0. +func polevl(x float64, coef []float64, n int) float64 { + ans := coef[0] + for i := 1; i <= n; i++ { + ans = ans*x + coef[i] + } + return ans +} + +// p1evl is the same as polevl, except c_n is assumed to be 1 and is not included +// in the slice. +func p1evl(x float64, coef []float64, n int) float64 { + ans := x + coef[0] + for i := 1; i <= n-1; i++ { + ans = ans*x + coef[i] + } + return ans +} + +// ratevl evaluates a rational function +func ratevl(x float64, num []float64, m int, denom []float64, n int) float64 { + // Source: Holin et. al., "Polynomial and Rational Function Evaluation", + // http://www.boost.org/doc/libs/1_61_0/libs/math/doc/html/math_toolkit/roots/rational.html + absx := math.Abs(x) + + var dir, idx int + var y float64 + if absx > 1 { + // Evaluate as a polynomial in 1/x + dir = -1 + idx = m + y = 1 / x + } else { + dir = 1 + idx = 0 + y = x + } + + // Evaluate the numerator + numAns := num[idx] + idx += dir + for i := 0; i < m; i++ { + numAns = numAns*y + num[idx] + idx += dir + } + + // Evaluate the denominator + if absx > 1 { + idx = n + } else { + idx = 0 + } + + denomAns := denom[idx] + idx += dir + for i := 0; i < n; i++ { + denomAns = denomAns*y + denom[idx] + idx += dir + } + + if absx > 1 { + pow := float64(n - m) + return math.Pow(x, pow) * numAns / denomAns + } + return numAns / denomAns +} diff --git a/vendor/gonum.org/v1/gonum/mathext/internal/cephes/staticcheck.conf b/vendor/gonum.org/v1/gonum/mathext/internal/cephes/staticcheck.conf new file mode 100644 index 0000000000..e7e254ff3f --- /dev/null +++ b/vendor/gonum.org/v1/gonum/mathext/internal/cephes/staticcheck.conf @@ -0,0 +1 @@ +checks = [] diff --git a/vendor/gonum.org/v1/gonum/mathext/internal/cephes/unity.go b/vendor/gonum.org/v1/gonum/mathext/internal/cephes/unity.go new file mode 100644 index 0000000000..3996e7e558 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/mathext/internal/cephes/unity.go @@ -0,0 +1,184 @@ +// Derived from SciPy's special/cephes/unity.c +// https://github.com/scipy/scipy/blob/master/scipy/special/cephes/unity.c +// Made freely available by Stephen L. Moshier without support or guarantee. + +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. +// Copyright ©1984, ©1996 by Stephen L. Moshier +// Portions Copyright ©2016 The Gonum Authors. All rights reserved. + +package cephes + +import "math" + +// Relative error approximations for function arguments near unity. +// log1p(x) = log(1+x) +// expm1(x) = exp(x) - 1 +// cosm1(x) = cos(x) - 1 +// lgam1p(x) = lgam(1+x) + +const ( + invSqrt2 = 1 / math.Sqrt2 + pi4 = math.Pi / 4 + euler = 0.577215664901532860606512090082402431 // Euler constant +) + +// Coefficients for +// +// log(1+x) = x - \frac{x^2}{2} + \frac{x^3 lP(x)}{lQ(x)} +// +// for +// +// \frac{1}{\sqrt{2}} <= x < \sqrt{2} +// +// Theoretical peak relative error = 2.32e-20 +var lP = [...]float64{ + 4.5270000862445199635215e-5, + 4.9854102823193375972212e-1, + 6.5787325942061044846969e0, + 2.9911919328553073277375e1, + 6.0949667980987787057556e1, + 5.7112963590585538103336e1, + 2.0039553499201281259648e1, +} + +var lQ = [...]float64{ + 1.5062909083469192043167e1, + 8.3047565967967209469434e1, + 2.2176239823732856465394e2, + 3.0909872225312059774938e2, + 2.1642788614495947685003e2, + 6.0118660497603843919306e1, +} + +// log1p computes +// +// log(1 + x) +func log1p(x float64) float64 { + z := 1 + x + if z < invSqrt2 || z > math.Sqrt2 { + return math.Log(z) + } + z = x * x + z = -0.5*z + x*(z*polevl(x, lP[:], 6)/p1evl(x, lQ[:], 6)) + return x + z +} + +// log1pmx computes +// +// log(1 + x) - x +func log1pmx(x float64) float64 { + if math.Abs(x) < 0.5 { + xfac := x + res := 0.0 + + var term float64 + for n := 2; n < maxIter; n++ { + xfac *= -x + term = xfac / float64(n) + res += term + if math.Abs(term) < machEp*math.Abs(res) { + break + } + } + return res + } + return log1p(x) - x +} + +// Coefficients for +// +// e^x = 1 + \frac{2x eP(x^2)}{eQ(x^2) - eP(x^2)} +// +// for +// +// -0.5 <= x <= 0.5 +var eP = [...]float64{ + 1.2617719307481059087798e-4, + 3.0299440770744196129956e-2, + 9.9999999999999999991025e-1, +} + +var eQ = [...]float64{ + 3.0019850513866445504159e-6, + 2.5244834034968410419224e-3, + 2.2726554820815502876593e-1, + 2.0000000000000000000897e0, +} + +// expm1 computes +// +// expm1(x) = e^x - 1 +func expm1(x float64) float64 { + if math.IsInf(x, 0) { + if math.IsNaN(x) || x > 0 { + return x + } + return -1 + } + if x < -0.5 || x > 0.5 { + return math.Exp(x) - 1 + } + xx := x * x + r := x * polevl(xx, eP[:], 2) + r = r / (polevl(xx, eQ[:], 3) - r) + return r + r +} + +var coscof = [...]float64{ + 4.7377507964246204691685e-14, + -1.1470284843425359765671e-11, + 2.0876754287081521758361e-9, + -2.7557319214999787979814e-7, + 2.4801587301570552304991e-5, + -1.3888888888888872993737e-3, + 4.1666666666666666609054e-2, +} + +// cosm1 computes +// +// cosm1(x) = cos(x) - 1 +func cosm1(x float64) float64 { + if x < -pi4 || x > pi4 { + return math.Cos(x) - 1 + } + xx := x * x + xx = -0.5*xx + xx*xx*polevl(xx, coscof[:], 6) + return xx +} + +// lgam1pTayler computes +// +// lgam(x + 1) +// +// around x = 0 using its Taylor series. +func lgam1pTaylor(x float64) float64 { + if x == 0 { + return 0 + } + res := -euler * x + xfac := -x + for n := 2; n < 42; n++ { + nf := float64(n) + xfac *= -x + coeff := Zeta(nf, 1) * xfac / nf + res += coeff + if math.Abs(coeff) < machEp*math.Abs(res) { + break + } + } + + return res +} + +// lgam1p computes +// +// lgam(x + 1) +func lgam1p(x float64) float64 { + if math.Abs(x) <= 0.5 { + return lgam1pTaylor(x) + } else if math.Abs(x-1) < 0.5 { + return math.Log(x) + lgam1pTaylor(x-1) + } + return lgam(x + 1) +} diff --git a/vendor/gonum.org/v1/gonum/mathext/internal/cephes/zeta.go b/vendor/gonum.org/v1/gonum/mathext/internal/cephes/zeta.go new file mode 100644 index 0000000000..0efeaa6045 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/mathext/internal/cephes/zeta.go @@ -0,0 +1,117 @@ +// Derived from SciPy's special/cephes/zeta.c +// https://github.com/scipy/scipy/blob/master/scipy/special/cephes/zeta.c +// Made freely available by Stephen L. Moshier without support or guarantee. + +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. +// Copyright ©1984, ©1987 by Stephen L. Moshier +// Portions Copyright ©2016 The Gonum Authors. All rights reserved. + +package cephes + +import "math" + +// zetaCoegs are the expansion coefficients for Euler-Maclaurin summation +// formula: +// +// \frac{(2k)!}{B_{2k}} +// +// where +// +// B_{2k} +// +// are Bernoulli numbers. +var zetaCoefs = [...]float64{ + 12.0, + -720.0, + 30240.0, + -1209600.0, + 47900160.0, + -1.307674368e12 / 691, + 7.47242496e10, + -1.067062284288e16 / 3617, + 5.109094217170944e18 / 43867, + -8.028576626982912e20 / 174611, + 1.5511210043330985984e23 / 854513, + -1.6938241367317436694528e27 / 236364091, +} + +// Zeta computes the Riemann zeta function of two arguments. +// +// Zeta(x,q) = \sum_{k=0}^{\infty} (k+q)^{-x} +// +// Note that Zeta returns +Inf if x is 1 and will panic if x is less than 1, +// q is either zero or a negative integer, or q is negative and x is not an +// integer. +// +// Note that: +// +// zeta(x,1) = zetac(x) + 1 +func Zeta(x, q float64) float64 { + // REFERENCE: Gradshteyn, I. S., and I. M. Ryzhik, Tables of Integrals, Series, + // and Products, p. 1073; Academic Press, 1980. + if x == 1 { + return math.Inf(1) + } + + if x < 1 { + panic(paramOutOfBounds) + } + + if q <= 0 { + if q == math.Floor(q) { + panic(errParamFunctionSingularity) + } + if x != math.Floor(x) { + panic(paramOutOfBounds) // Because q^-x not defined + } + } + + // Asymptotic expansion: http://dlmf.nist.gov/25.11#E43 + if q > 1e8 { + return (1/(x-1) + 1/(2*q)) * math.Pow(q, 1-x) + } + + // The Euler-Maclaurin summation formula is used to obtain the expansion: + // Zeta(x,q) = \sum_{k=1}^n (k+q)^{-x} + \frac{(n+q)^{1-x}}{x-1} - \frac{1}{2(n+q)^x} + \sum_{j=1}^{\infty} \frac{B_{2j}x(x+1)...(x+2j)}{(2j)! (n+q)^{x+2j+1}} + // where + // B_{2j} + // are Bernoulli numbers. + // Permit negative q but continue sum until n+q > 9. This case should be + // handled by a reflection formula. If q<0 and x is an integer, there is a + // relation to the polyGamma function. + s := math.Pow(q, -x) + a := q + i := 0 + b := 0.0 + for i < 9 || a <= 9 { + i++ + a += 1.0 + b = math.Pow(a, -x) + s += b + if math.Abs(b/s) < machEp { + return s + } + } + + w := a + s += b * w / (x - 1) + s -= 0.5 * b + a = 1.0 + k := 0.0 + for _, coef := range zetaCoefs { + a *= x + k + b /= w + t := a * b / coef + s = s + t + t = math.Abs(t / s) + if t < machEp { + return s + } + k += 1.0 + a *= x + k + b /= w + k += 1.0 + } + return s +} diff --git a/vendor/gonum.org/v1/gonum/mathext/internal/gonum/beta.go b/vendor/gonum.org/v1/gonum/mathext/internal/gonum/beta.go new file mode 100644 index 0000000000..f1fb3587f2 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/mathext/internal/gonum/beta.go @@ -0,0 +1,58 @@ +// Copyright ©2016 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gonum + +import ( + "math" +) + +// Beta returns the value of the complete beta function B(a, b). It is defined as +// +// Γ(a)Γ(b) / Γ(a+b) +// +// Special cases are: +// +// B(a,b) returns NaN if a or b is Inf +// B(a,b) returns NaN if a and b are 0 +// B(a,b) returns NaN if a or b is NaN +// B(a,b) returns NaN if a or b is < 0 +// B(a,b) returns +Inf if a xor b is 0. +// +// See http://mathworld.wolfram.com/BetaFunction.html for more detailed information. +func Beta(a, b float64) float64 { + return math.Exp(Lbeta(a, b)) +} + +// Lbeta returns the natural logarithm of the complete beta function B(a,b). +// Lbeta is defined as: +// +// Ln(Γ(a)Γ(b)/Γ(a+b)) +// +// Special cases are: +// +// Lbeta(a,b) returns NaN if a or b is Inf +// Lbeta(a,b) returns NaN if a and b are 0 +// Lbeta(a,b) returns NaN if a or b is NaN +// Lbeta(a,b) returns NaN if a or b is < 0 +// Lbeta(a,b) returns +Inf if a xor b is 0. +func Lbeta(a, b float64) float64 { + switch { + case math.IsInf(a, +1) || math.IsInf(b, +1): + return math.NaN() + case a == 0 && b == 0: + return math.NaN() + case a < 0 || b < 0: + return math.NaN() + case math.IsNaN(a) || math.IsNaN(b): + return math.NaN() + case a == 0 || b == 0: + return math.Inf(+1) + } + + la, _ := math.Lgamma(a) + lb, _ := math.Lgamma(b) + lab, _ := math.Lgamma(a + b) + return la + lb - lab +} diff --git a/vendor/gonum.org/v1/gonum/mathext/internal/gonum/doc.go b/vendor/gonum.org/v1/gonum/mathext/internal/gonum/doc.go new file mode 100644 index 0000000000..cbe6aa2381 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/mathext/internal/gonum/doc.go @@ -0,0 +1,7 @@ +// Copyright ©2017 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package gonum contains functions implemented by the gonum team. +// It is here to avoid circular imports and/or double coding of functions. +package gonum // import "gonum.org/v1/gonum/mathext/internal/gonum" diff --git a/vendor/gonum.org/v1/gonum/mathext/internal/gonum/gonum.go b/vendor/gonum.org/v1/gonum/mathext/internal/gonum/gonum.go new file mode 100644 index 0000000000..47e02ce386 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/mathext/internal/gonum/gonum.go @@ -0,0 +1,5 @@ +// Copyright ©2016 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gonum diff --git a/vendor/gonum.org/v1/gonum/mathext/mvgamma.go b/vendor/gonum.org/v1/gonum/mathext/mvgamma.go new file mode 100644 index 0000000000..99a9233de4 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/mathext/mvgamma.go @@ -0,0 +1,32 @@ +// Copyright ©2016 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package mathext + +import "math" + +const ( + logPi = 1.14472988584940017414342735135305871164729481 // http://oeis.org/A053510 +) + +// MvLgamma returns the log of the multivariate Gamma function. Dim +// must be greater than zero, and MvLgamma will return NaN if v < (dim-1)/2. +// +// See https://en.wikipedia.org/wiki/Multivariate_gamma_function for more +// information. +func MvLgamma(v float64, dim int) float64 { + if dim < 1 { + panic("mathext: negative dimension") + } + df := float64(dim) + if v < (df-1)*0.5 { + return math.NaN() + } + ans := df * (df - 1) * 0.25 * logPi + for i := 1; i <= dim; i++ { + lg, _ := math.Lgamma(v + float64(1-i)*0.5) + ans += lg + } + return ans +} diff --git a/vendor/gonum.org/v1/gonum/mathext/roots.go b/vendor/gonum.org/v1/gonum/mathext/roots.go new file mode 100644 index 0000000000..120ce6ef26 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/mathext/roots.go @@ -0,0 +1,181 @@ +// Derived from SciPy's special/c_misc/fsolve.c and special/c_misc/misc.h +// https://github.com/scipy/scipy/blob/master/scipy/special/c_misc/fsolve.c +// https://github.com/scipy/scipy/blob/master/scipy/special/c_misc/misc.h + +// Copyright ©2017 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package mathext + +import "math" + +type objectiveFunc func(float64, []float64) float64 + +type fSolveResult uint8 + +const ( + // An exact solution was found, in which case the first point on the + // interval is the value + fSolveExact fSolveResult = iota + 1 + // Interval width is less than the tolerance + fSolveConverged + // Root-finding didn't converge in a set number of iterations + fSolveMaxIterations +) + +const ( + machEp = 1.0 / (1 << 53) +) + +// falsePosition uses a combination of bisection and false position to find a +// root of a function within a given interval. This is guaranteed to converge, +// and always keeps a bounding interval, unlike Newton's method. Inputs are: +// +// x1, x2: initial bounding interval +// f1, f2: value of f() at x1 and x2 +// absErr, relErr: absolute and relative errors on the bounding interval +// bisectTil: if > 0.0, perform bisection until the width of the bounding +// interval is less than this +// f, fExtra: function to find root of is f(x, fExtra) +// +// Returns: +// +// result: whether an exact root was found, the process converged to a +// bounding interval small than the required error, or the max number +// of iterations was hit +// bestX: best root approximation +// bestF: function value at bestX +// errEst: error estimation +func falsePosition(x1, x2, f1, f2, absErr, relErr, bisectTil float64, f objectiveFunc, fExtra []float64) (fSolveResult, float64, float64, float64) { + // The false position steps are either unmodified, or modified with the + // Anderson-Bjorck method as appropriate. Theoretically, this has a "speed of + // convergence" of 1.7 (bisection is 1, Newton is 2). + // Note that this routine was designed initially to work with gammaincinv, so + // it may not be tuned right for other problems. Don't use it blindly. + + if f1*f2 >= 0 { + panic("Initial interval is not a bounding interval") + } + + const ( + maxIterations = 100 + bisectIter = 4 + bisectWidth = 4.0 + ) + + const ( + bisect = iota + 1 + falseP + ) + + var state uint8 + if bisectTil > 0 { + state = bisect + } else { + state = falseP + } + + gamma := 1.0 + + w := math.Abs(x2 - x1) + lastBisectWidth := w + + var nFalseP int + var x3, f3, bestX, bestF float64 + for i := 0; i < maxIterations; i++ { + switch state { + case bisect: + x3 = 0.5 * (x1 + x2) + if x3 == x1 || x3 == x2 { + // i.e., x1 and x2 are successive floating-point numbers + bestX = x3 + if x3 == x1 { + bestF = f1 + } else { + bestF = f2 + } + return fSolveConverged, bestX, bestF, w + } + + f3 = f(x3, fExtra) + if f3 == 0 { + return fSolveExact, x3, f3, w + } + + if f3*f2 < 0 { + x1 = x2 + f1 = f2 + } + x2 = x3 + f2 = f3 + w = math.Abs(x2 - x1) + lastBisectWidth = w + if bisectTil > 0 { + if w < bisectTil { + bisectTil = -1.0 + gamma = 1.0 + nFalseP = 0 + state = falseP + } + } else { + gamma = 1.0 + nFalseP = 0 + state = falseP + } + case falseP: + s12 := (f2 - gamma*f1) / (x2 - x1) + x3 = x2 - f2/s12 + f3 = f(x3, fExtra) + if f3 == 0 { + return fSolveExact, x3, f3, w + } + + nFalseP++ + if f3*f2 < 0 { + gamma = 1.0 + x1 = x2 + f1 = f2 + } else { + // Anderson-Bjorck method + g := 1.0 - f3/f2 + if g <= 0 { + g = 0.5 + } + gamma *= g + } + x2 = x3 + f2 = f3 + w = math.Abs(x2 - x1) + + // Sanity check. For every 4 false position checks, see if we really are + // decreasing the interval by comparing to what bisection would have + // achieved (or, rather, a bit more lenient than that -- interval + // decreased by 4 instead of by 16, as the fp could be decreasing gamma + // for a bit). Note that this should guarantee convergence, as it makes + // sure that we always end up decreasing the interval width with a + // bisection. + if nFalseP > bisectIter { + if w*bisectWidth > lastBisectWidth { + state = bisect + } + nFalseP = 0 + lastBisectWidth = w + } + } + + tol := absErr + relErr*math.Max(math.Max(math.Abs(x1), math.Abs(x2)), 1.0) + if w <= tol { + if math.Abs(f1) < math.Abs(f2) { + bestX = x1 + bestF = f1 + } else { + bestX = x2 + bestF = f2 + } + return fSolveConverged, bestX, bestF, w + } + } + + return fSolveMaxIterations, x3, f3, w +} diff --git a/vendor/gonum.org/v1/gonum/mathext/zeta.go b/vendor/gonum.org/v1/gonum/mathext/zeta.go new file mode 100644 index 0000000000..23a87fae8b --- /dev/null +++ b/vendor/gonum.org/v1/gonum/mathext/zeta.go @@ -0,0 +1,22 @@ +// Copyright ©2016 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package mathext + +import "gonum.org/v1/gonum/mathext/internal/cephes" + +// Zeta computes the Riemann zeta function of two arguments. +// +// Zeta(x,q) = \sum_{k=0}^{\infty} (k+q)^{-x} +// +// Note that Zeta returns +Inf if x is 1 and will panic if x is less than 1, +// q is either zero or a negative integer, or q is negative and x is not an +// integer. +// +// See http://mathworld.wolfram.com/HurwitzZetaFunction.html +// or https://en.wikipedia.org/wiki/Multiple_zeta_function#Two_parameters_case +// for more detailed information. +func Zeta(x, q float64) float64 { + return cephes.Zeta(x, q) +} diff --git a/vendor/gonum.org/v1/gonum/optimize/README.md b/vendor/gonum.org/v1/gonum/optimize/README.md new file mode 100644 index 0000000000..70a8530616 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/optimize/README.md @@ -0,0 +1,6 @@ +# Gonum optimize + +[![go.dev reference](https://pkg.go.dev/badge/gonum.org/v1/gonum/optimize)](https://pkg.go.dev/gonum.org/v1/gonum/optimize) +[![GoDoc](https://godocs.io/gonum.org/v1/gonum/optimize?status.svg)](https://godocs.io/gonum.org/v1/gonum/optimize) + +Package optimize is an optimization package for the Go language. diff --git a/vendor/gonum.org/v1/gonum/optimize/backtracking.go b/vendor/gonum.org/v1/gonum/optimize/backtracking.go new file mode 100644 index 0000000000..2ab44e44db --- /dev/null +++ b/vendor/gonum.org/v1/gonum/optimize/backtracking.go @@ -0,0 +1,84 @@ +// Copyright ©2014 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package optimize + +const ( + defaultBacktrackingContraction = 0.5 + defaultBacktrackingDecrease = 1e-4 + minimumBacktrackingStepSize = 1e-20 +) + +var _ Linesearcher = (*Backtracking)(nil) + +// Backtracking is a Linesearcher that uses backtracking to find a point that +// satisfies the Armijo condition with the given decrease factor. If the Armijo +// condition has not been met, the step size is decreased by ContractionFactor. +// +// The Armijo condition only requires the gradient at the beginning of each +// major iteration (not at successive step locations), and so Backtracking may +// be a good linesearch for functions with expensive gradients. Backtracking is +// not appropriate for optimizers that require the Wolfe conditions to be met, +// such as BFGS. +// +// Both DecreaseFactor and ContractionFactor must be between zero and one, and +// Backtracking will panic otherwise. If either DecreaseFactor or +// ContractionFactor are zero, it will be set to a reasonable default. +type Backtracking struct { + DecreaseFactor float64 // Constant factor in the sufficient decrease (Armijo) condition. + ContractionFactor float64 // Step size multiplier at each iteration (step *= ContractionFactor). + + stepSize float64 + initF float64 + initG float64 + + lastOp Operation +} + +func (b *Backtracking) Init(f, g float64, step float64) Operation { + if step <= 0 { + panic("backtracking: bad step size") + } + if g >= 0 { + panic("backtracking: initial derivative is non-negative") + } + + if b.ContractionFactor == 0 { + b.ContractionFactor = defaultBacktrackingContraction + } + if b.DecreaseFactor == 0 { + b.DecreaseFactor = defaultBacktrackingDecrease + } + if b.ContractionFactor <= 0 || b.ContractionFactor >= 1 { + panic("backtracking: ContractionFactor must be between 0 and 1") + } + if b.DecreaseFactor <= 0 || b.DecreaseFactor >= 1 { + panic("backtracking: DecreaseFactor must be between 0 and 1") + } + + b.stepSize = step + b.initF = f + b.initG = g + + b.lastOp = FuncEvaluation + return b.lastOp +} + +func (b *Backtracking) Iterate(f, _ float64) (Operation, float64, error) { + if b.lastOp != FuncEvaluation { + panic("backtracking: Init has not been called") + } + + if ArmijoConditionMet(f, b.initF, b.initG, b.stepSize, b.DecreaseFactor) { + b.lastOp = MajorIteration + return b.lastOp, b.stepSize, nil + } + b.stepSize *= b.ContractionFactor + if b.stepSize < minimumBacktrackingStepSize { + b.lastOp = NoOperation + return b.lastOp, b.stepSize, ErrLinesearcherFailure + } + b.lastOp = FuncEvaluation + return b.lastOp, b.stepSize, nil +} diff --git a/vendor/gonum.org/v1/gonum/optimize/bfgs.go b/vendor/gonum.org/v1/gonum/optimize/bfgs.go new file mode 100644 index 0000000000..b44ef81ee0 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/optimize/bfgs.go @@ -0,0 +1,192 @@ +// Copyright ©2014 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package optimize + +import ( + "math" + + "gonum.org/v1/gonum/mat" +) + +var ( + _ Method = (*BFGS)(nil) + _ localMethod = (*BFGS)(nil) + _ NextDirectioner = (*BFGS)(nil) +) + +// BFGS implements the Broyden–Fletcher–Goldfarb–Shanno optimization method. It +// is a quasi-Newton method that performs successive rank-one updates to an +// estimate of the inverse Hessian of the objective function. It exhibits +// super-linear convergence when in proximity to a local minimum. It has memory +// cost that is O(n^2) relative to the input dimension. +type BFGS struct { + // Linesearcher selects suitable steps along the descent direction. + // Accepted steps should satisfy the strong Wolfe conditions. + // If Linesearcher == nil, an appropriate default is chosen. + Linesearcher Linesearcher + // GradStopThreshold sets the threshold for stopping if the gradient norm + // gets too small. If GradStopThreshold is 0 it is defaulted to 1e-12, and + // if it is NaN the setting is not used. + GradStopThreshold float64 + + ls *LinesearchMethod + + status Status + err error + + dim int + x mat.VecDense // Location of the last major iteration. + grad mat.VecDense // Gradient at the last major iteration. + s mat.VecDense // Difference between locations in this and the previous iteration. + y mat.VecDense // Difference between gradients in this and the previous iteration. + tmp mat.VecDense + + invHess *mat.SymDense + + first bool // Indicator of the first iteration. +} + +func (b *BFGS) Status() (Status, error) { + return b.status, b.err +} + +func (*BFGS) Uses(has Available) (uses Available, err error) { + return has.gradient() +} + +func (b *BFGS) Init(dim, tasks int) int { + b.status = NotTerminated + b.err = nil + return 1 +} + +func (b *BFGS) Run(operation chan<- Task, result <-chan Task, tasks []Task) { + b.status, b.err = localOptimizer{}.run(b, b.GradStopThreshold, operation, result, tasks) + close(operation) +} + +func (b *BFGS) initLocal(loc *Location) (Operation, error) { + if b.Linesearcher == nil { + b.Linesearcher = &Bisection{} + } + if b.ls == nil { + b.ls = &LinesearchMethod{} + } + b.ls.Linesearcher = b.Linesearcher + b.ls.NextDirectioner = b + + return b.ls.Init(loc) +} + +func (b *BFGS) iterateLocal(loc *Location) (Operation, error) { + return b.ls.Iterate(loc) +} + +func (b *BFGS) InitDirection(loc *Location, dir []float64) (stepSize float64) { + dim := len(loc.X) + b.dim = dim + b.first = true + + x := mat.NewVecDense(dim, loc.X) + grad := mat.NewVecDense(dim, loc.Gradient) + b.x.CloneFromVec(x) + b.grad.CloneFromVec(grad) + + b.y.Reset() + b.s.Reset() + b.tmp.Reset() + + if b.invHess == nil || cap(b.invHess.RawSymmetric().Data) < dim*dim { + b.invHess = mat.NewSymDense(dim, nil) + } else { + b.invHess = mat.NewSymDense(dim, b.invHess.RawSymmetric().Data[:dim*dim]) + } + // The values of the inverse Hessian are initialized in the first call to + // NextDirection. + + // Initial direction is just negative of the gradient because the Hessian + // is an identity matrix. + d := mat.NewVecDense(dim, dir) + d.ScaleVec(-1, grad) + return 1 / mat.Norm(d, 2) +} + +func (b *BFGS) NextDirection(loc *Location, dir []float64) (stepSize float64) { + dim := b.dim + if len(loc.X) != dim { + panic("bfgs: unexpected size mismatch") + } + if len(loc.Gradient) != dim { + panic("bfgs: unexpected size mismatch") + } + if len(dir) != dim { + panic("bfgs: unexpected size mismatch") + } + + x := mat.NewVecDense(dim, loc.X) + grad := mat.NewVecDense(dim, loc.Gradient) + + // s = x_{k+1} - x_{k} + b.s.SubVec(x, &b.x) + // y = g_{k+1} - g_{k} + b.y.SubVec(grad, &b.grad) + + sDotY := mat.Dot(&b.s, &b.y) + + if b.first { + // Rescale the initial Hessian. + // From: Nocedal, J., Wright, S.: Numerical Optimization (2nd ed). + // Springer (2006), page 143, eq. 6.20. + yDotY := mat.Dot(&b.y, &b.y) + scale := sDotY / yDotY + for i := 0; i < dim; i++ { + for j := i; j < dim; j++ { + if i == j { + b.invHess.SetSym(i, i, scale) + } else { + b.invHess.SetSym(i, j, 0) + } + } + } + b.first = false + } + + if math.Abs(sDotY) != 0 { + // Update the inverse Hessian according to the formula + // + // B_{k+1}^-1 = B_k^-1 + // + (s_kᵀ y_k + y_kᵀ B_k^-1 y_k) / (s_kᵀ y_k)^2 * (s_k s_kᵀ) + // - (B_k^-1 y_k s_kᵀ + s_k y_kᵀ B_k^-1) / (s_kᵀ y_k). + // + // Note that y_kᵀ B_k^-1 y_k is a scalar, and that the third term is a + // rank-two update where B_k^-1 y_k is one vector and s_k is the other. + yBy := mat.Inner(&b.y, b.invHess, &b.y) + b.tmp.MulVec(b.invHess, &b.y) + scale := (1 + yBy/sDotY) / sDotY + b.invHess.SymRankOne(b.invHess, scale, &b.s) + b.invHess.RankTwo(b.invHess, -1/sDotY, &b.tmp, &b.s) + } + + // Update the stored BFGS data. + b.x.CopyVec(x) + b.grad.CopyVec(grad) + + // New direction is stored in dir. + d := mat.NewVecDense(dim, dir) + d.MulVec(b.invHess, grad) + d.ScaleVec(-1, d) + + return 1 +} + +func (*BFGS) needs() struct { + Gradient bool + Hessian bool +} { + return struct { + Gradient bool + Hessian bool + }{true, false} +} diff --git a/vendor/gonum.org/v1/gonum/optimize/bisection.go b/vendor/gonum.org/v1/gonum/optimize/bisection.go new file mode 100644 index 0000000000..b194a2090b --- /dev/null +++ b/vendor/gonum.org/v1/gonum/optimize/bisection.go @@ -0,0 +1,146 @@ +// Copyright ©2014 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package optimize + +import "math" + +const defaultBisectionCurvature = 0.9 + +var _ Linesearcher = (*Bisection)(nil) + +// Bisection is a Linesearcher that uses a bisection to find a point that +// satisfies the strong Wolfe conditions with the given curvature factor and +// a decrease factor of zero. +type Bisection struct { + // CurvatureFactor is the constant factor in the curvature condition. + // Smaller values result in a more exact line search. + // A set value must be in the interval (0, 1), otherwise Init will panic. + // If it is zero, it will be defaulted to 0.9. + CurvatureFactor float64 + + minStep float64 + maxStep float64 + currStep float64 + + initF float64 + minF float64 + maxF float64 + lastF float64 + + initGrad float64 + + lastOp Operation +} + +func (b *Bisection) Init(f, g float64, step float64) Operation { + if step <= 0 { + panic("bisection: bad step size") + } + if g >= 0 { + panic("bisection: initial derivative is non-negative") + } + + if b.CurvatureFactor == 0 { + b.CurvatureFactor = defaultBisectionCurvature + } + if b.CurvatureFactor <= 0 || b.CurvatureFactor >= 1 { + panic("bisection: CurvatureFactor not between 0 and 1") + } + + b.minStep = 0 + b.maxStep = math.Inf(1) + b.currStep = step + + b.initF = f + b.minF = f + b.maxF = math.NaN() + + b.initGrad = g + + // Only evaluate the gradient when necessary. + b.lastOp = FuncEvaluation + return b.lastOp +} + +func (b *Bisection) Iterate(f, g float64) (Operation, float64, error) { + if b.lastOp != FuncEvaluation && b.lastOp != GradEvaluation { + panic("bisection: Init has not been called") + } + minF := b.initF + if b.maxF < minF { + minF = b.maxF + } + if b.minF < minF { + minF = b.minF + } + if b.lastOp == FuncEvaluation { + // See if the function value is good enough to make progress. If it is, + // evaluate the gradient. If not, set it to the upper bound if the bound + // has not yet been found, otherwise iterate toward the minimum location. + if f <= minF { + b.lastF = f + b.lastOp = GradEvaluation + return b.lastOp, b.currStep, nil + } + if math.IsInf(b.maxStep, 1) { + b.maxStep = b.currStep + b.maxF = f + return b.nextStep((b.minStep + b.maxStep) / 2) + } + if b.minF <= b.maxF { + b.maxStep = b.currStep + b.maxF = f + } else { + b.minStep = b.currStep + b.minF = f + } + return b.nextStep((b.minStep + b.maxStep) / 2) + } + f = b.lastF + // The function value was lower. Check if this location is sufficient to + // converge the linesearch, otherwise iterate. + if StrongWolfeConditionsMet(f, g, minF, b.initGrad, b.currStep, 0, b.CurvatureFactor) { + b.lastOp = MajorIteration + return b.lastOp, b.currStep, nil + } + if math.IsInf(b.maxStep, 1) { + // The function value is lower. If the gradient is positive, an upper bound + // of the minimum been found. If the gradient is negative, search farther + // in that direction. + if g > 0 { + b.maxStep = b.currStep + b.maxF = f + return b.nextStep((b.minStep + b.maxStep) / 2) + } + b.minStep = b.currStep + b.minF = f + return b.nextStep(b.currStep * 2) + } + // The interval has been bounded, and we have found a new lowest value. Use + // the gradient to decide which direction. + if g < 0 { + b.minStep = b.currStep + b.minF = f + } else { + b.maxStep = b.currStep + b.maxF = f + } + return b.nextStep((b.minStep + b.maxStep) / 2) +} + +// nextStep checks if the new step is equal to the old step. +// This can happen if min and max are the same, or if the step size is infinity, +// both of which indicate the minimization must stop. If the steps are different, +// it sets the new step size and returns the evaluation type and the step. If the steps +// are the same, it returns an error. +func (b *Bisection) nextStep(step float64) (Operation, float64, error) { + if b.currStep == step { + b.lastOp = NoOperation + return b.lastOp, b.currStep, ErrLinesearcherFailure + } + b.currStep = step + b.lastOp = FuncEvaluation + return b.lastOp, b.currStep, nil +} diff --git a/vendor/gonum.org/v1/gonum/optimize/cg.go b/vendor/gonum.org/v1/gonum/optimize/cg.go new file mode 100644 index 0000000000..6474b03792 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/optimize/cg.go @@ -0,0 +1,368 @@ +// Copyright ©2014 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package optimize + +import ( + "math" + + "gonum.org/v1/gonum/floats" +) + +const ( + iterationRestartFactor = 6 + angleRestartThreshold = -0.9 +) + +var ( + _ Method = (*CG)(nil) + _ localMethod = (*CG)(nil) + _ NextDirectioner = (*CG)(nil) +) + +// CGVariant calculates the scaling parameter, β, used for updating the +// conjugate direction in the nonlinear conjugate gradient (CG) method. +type CGVariant interface { + // Init is called at the first iteration and provides a way to initialize + // any internal state. + Init(loc *Location) + // Beta returns the value of the scaling parameter that is computed + // according to the particular variant of the CG method. + Beta(grad, gradPrev, dirPrev []float64) float64 +} + +var ( + _ CGVariant = (*FletcherReeves)(nil) + _ CGVariant = (*PolakRibierePolyak)(nil) + _ CGVariant = (*HestenesStiefel)(nil) + _ CGVariant = (*DaiYuan)(nil) + _ CGVariant = (*HagerZhang)(nil) +) + +// CG implements the nonlinear conjugate gradient method for solving nonlinear +// unconstrained optimization problems. It is a line search method that +// generates the search directions d_k according to the formula +// +// d_{k+1} = -∇f_{k+1} + β_k*d_k, d_0 = -∇f_0. +// +// Variants of the conjugate gradient method differ in the choice of the +// parameter β_k. The conjugate gradient method usually requires fewer function +// evaluations than the gradient descent method and no matrix storage, but +// L-BFGS is usually more efficient. +// +// CG implements a restart strategy that takes the steepest descent direction +// (i.e., d_{k+1} = -∇f_{k+1}) whenever any of the following conditions holds: +// +// - A certain number of iterations has elapsed without a restart. This number +// is controllable via IterationRestartFactor and if equal to 0, it is set to +// a reasonable default based on the problem dimension. +// - The angle between the gradients at two consecutive iterations ∇f_k and +// ∇f_{k+1} is too large. +// - The direction d_{k+1} is not a descent direction. +// - β_k returned from CGVariant.Beta is equal to zero. +// +// The line search for CG must yield step sizes that satisfy the strong Wolfe +// conditions at every iteration, otherwise the generated search direction +// might fail to be a descent direction. The line search should be more +// stringent compared with those for Newton-like methods, which can be achieved +// by setting the gradient constant in the strong Wolfe conditions to a small +// value. +// +// See also William Hager, Hongchao Zhang, A survey of nonlinear conjugate +// gradient methods. Pacific Journal of Optimization, 2 (2006), pp. 35-58, and +// references therein. +type CG struct { + // Linesearcher must satisfy the strong Wolfe conditions at every iteration. + // If Linesearcher == nil, an appropriate default is chosen. + Linesearcher Linesearcher + // Variant implements the particular CG formula for computing β_k. + // If Variant is nil, an appropriate default is chosen. + Variant CGVariant + // InitialStep estimates the initial line search step size, because the CG + // method does not generate well-scaled search directions. + // If InitialStep is nil, an appropriate default is chosen. + InitialStep StepSizer + + // IterationRestartFactor determines the frequency of restarts based on the + // problem dimension. The negative gradient direction is taken whenever + // ceil(IterationRestartFactor*(problem dimension)) iterations have elapsed + // without a restart. For medium and large-scale problems + // IterationRestartFactor should be set to 1, low-dimensional problems a + // larger value should be chosen. Note that if the ceil function returns 1, + // CG will be identical to gradient descent. + // If IterationRestartFactor is 0, it will be set to 6. + // CG will panic if IterationRestartFactor is negative. + IterationRestartFactor float64 + // AngleRestartThreshold sets the threshold angle for restart. The method + // is restarted if the cosine of the angle between two consecutive + // gradients is smaller than or equal to AngleRestartThreshold, that is, if + // ∇f_k·∇f_{k+1} / (|∇f_k| |∇f_{k+1}|) <= AngleRestartThreshold. + // A value of AngleRestartThreshold closer to -1 (successive gradients in + // exact opposite directions) will tend to reduce the number of restarts. + // If AngleRestartThreshold is 0, it will be set to -0.9. + // CG will panic if AngleRestartThreshold is not in the interval [-1, 0]. + AngleRestartThreshold float64 + // GradStopThreshold sets the threshold for stopping if the gradient norm + // gets too small. If GradStopThreshold is 0 it is defaulted to 1e-12, and + // if it is NaN the setting is not used. + GradStopThreshold float64 + + ls *LinesearchMethod + + status Status + err error + + restartAfter int + iterFromRestart int + + dirPrev []float64 + gradPrev []float64 + gradPrevNorm float64 +} + +func (cg *CG) Status() (Status, error) { + return cg.status, cg.err +} + +func (*CG) Uses(has Available) (uses Available, err error) { + return has.gradient() +} + +func (cg *CG) Init(dim, tasks int) int { + cg.status = NotTerminated + cg.err = nil + return 1 +} + +func (cg *CG) Run(operation chan<- Task, result <-chan Task, tasks []Task) { + cg.status, cg.err = localOptimizer{}.run(cg, cg.GradStopThreshold, operation, result, tasks) + close(operation) +} + +func (cg *CG) initLocal(loc *Location) (Operation, error) { + if cg.IterationRestartFactor < 0 { + panic("cg: IterationRestartFactor is negative") + } + if cg.AngleRestartThreshold < -1 || cg.AngleRestartThreshold > 0 { + panic("cg: AngleRestartThreshold not in [-1, 0]") + } + + if cg.Linesearcher == nil { + cg.Linesearcher = &MoreThuente{CurvatureFactor: 0.1} + } + if cg.Variant == nil { + cg.Variant = &HestenesStiefel{} + } + if cg.InitialStep == nil { + cg.InitialStep = &FirstOrderStepSize{} + } + + if cg.IterationRestartFactor == 0 { + cg.IterationRestartFactor = iterationRestartFactor + } + if cg.AngleRestartThreshold == 0 { + cg.AngleRestartThreshold = angleRestartThreshold + } + + if cg.ls == nil { + cg.ls = &LinesearchMethod{} + } + cg.ls.Linesearcher = cg.Linesearcher + cg.ls.NextDirectioner = cg + + return cg.ls.Init(loc) +} + +func (cg *CG) iterateLocal(loc *Location) (Operation, error) { + return cg.ls.Iterate(loc) +} + +func (cg *CG) InitDirection(loc *Location, dir []float64) (stepSize float64) { + dim := len(loc.X) + + cg.restartAfter = int(math.Ceil(cg.IterationRestartFactor * float64(dim))) + cg.iterFromRestart = 0 + + // The initial direction is always the negative gradient. + copy(dir, loc.Gradient) + floats.Scale(-1, dir) + + cg.dirPrev = resize(cg.dirPrev, dim) + copy(cg.dirPrev, dir) + cg.gradPrev = resize(cg.gradPrev, dim) + copy(cg.gradPrev, loc.Gradient) + cg.gradPrevNorm = floats.Norm(loc.Gradient, 2) + + cg.Variant.Init(loc) + return cg.InitialStep.Init(loc, dir) +} + +func (cg *CG) NextDirection(loc *Location, dir []float64) (stepSize float64) { + copy(dir, loc.Gradient) + floats.Scale(-1, dir) + + cg.iterFromRestart++ + var restart bool + if cg.iterFromRestart == cg.restartAfter { + // Restart because too many iterations have been taken without a restart. + restart = true + } + + gDot := floats.Dot(loc.Gradient, cg.gradPrev) + gNorm := floats.Norm(loc.Gradient, 2) + if gDot <= cg.AngleRestartThreshold*gNorm*cg.gradPrevNorm { + // Restart because the angle between the last two gradients is too large. + restart = true + } + + // Compute the scaling factor β_k even when restarting, because cg.Variant + // may be keeping an inner state that needs to be updated at every iteration. + beta := cg.Variant.Beta(loc.Gradient, cg.gradPrev, cg.dirPrev) + if beta == 0 { + // β_k == 0 means that the steepest descent direction will be taken, so + // indicate that the method is in fact being restarted. + restart = true + } + if !restart { + // The method is not being restarted, so update the descent direction. + floats.AddScaled(dir, beta, cg.dirPrev) + if floats.Dot(loc.Gradient, dir) >= 0 { + // Restart because the new direction is not a descent direction. + restart = true + copy(dir, loc.Gradient) + floats.Scale(-1, dir) + } + } + + // Get the initial line search step size from the StepSizer even if the + // method was restarted, because StepSizers need to see every iteration. + stepSize = cg.InitialStep.StepSize(loc, dir) + if restart { + // The method was restarted and since the steepest descent direction is + // not related to the previous direction, discard the estimated step + // size from cg.InitialStep and use step size of 1 instead. + stepSize = 1 + // Reset to 0 the counter of iterations taken since the last restart. + cg.iterFromRestart = 0 + } + + copy(cg.gradPrev, loc.Gradient) + copy(cg.dirPrev, dir) + cg.gradPrevNorm = gNorm + return stepSize +} + +func (*CG) needs() struct { + Gradient bool + Hessian bool +} { + return struct { + Gradient bool + Hessian bool + }{true, false} +} + +// FletcherReeves implements the Fletcher-Reeves variant of the CG method that +// computes the scaling parameter β_k according to the formula +// +// β_k = |∇f_{k+1}|^2 / |∇f_k|^2. +type FletcherReeves struct { + prevNorm float64 +} + +func (fr *FletcherReeves) Init(loc *Location) { + fr.prevNorm = floats.Norm(loc.Gradient, 2) +} + +func (fr *FletcherReeves) Beta(grad, _, _ []float64) (beta float64) { + norm := floats.Norm(grad, 2) + beta = (norm / fr.prevNorm) * (norm / fr.prevNorm) + fr.prevNorm = norm + return beta +} + +// PolakRibierePolyak implements the Polak-Ribiere-Polyak variant of the CG +// method that computes the scaling parameter β_k according to the formula +// +// β_k = max(0, ∇f_{k+1}·y_k / |∇f_k|^2), +// +// where y_k = ∇f_{k+1} - ∇f_k. +type PolakRibierePolyak struct { + prevNorm float64 +} + +func (pr *PolakRibierePolyak) Init(loc *Location) { + pr.prevNorm = floats.Norm(loc.Gradient, 2) +} + +func (pr *PolakRibierePolyak) Beta(grad, gradPrev, _ []float64) (beta float64) { + norm := floats.Norm(grad, 2) + dot := floats.Dot(grad, gradPrev) + beta = (norm*norm - dot) / (pr.prevNorm * pr.prevNorm) + pr.prevNorm = norm + return math.Max(0, beta) +} + +// HestenesStiefel implements the Hestenes-Stiefel variant of the CG method +// that computes the scaling parameter β_k according to the formula +// +// β_k = max(0, ∇f_{k+1}·y_k / d_k·y_k), +// +// where y_k = ∇f_{k+1} - ∇f_k. +type HestenesStiefel struct { + y []float64 +} + +func (hs *HestenesStiefel) Init(loc *Location) { + hs.y = resize(hs.y, len(loc.Gradient)) +} + +func (hs *HestenesStiefel) Beta(grad, gradPrev, dirPrev []float64) (beta float64) { + floats.SubTo(hs.y, grad, gradPrev) + beta = floats.Dot(grad, hs.y) / floats.Dot(dirPrev, hs.y) + return math.Max(0, beta) +} + +// DaiYuan implements the Dai-Yuan variant of the CG method that computes the +// scaling parameter β_k according to the formula +// +// β_k = |∇f_{k+1}|^2 / d_k·y_k, +// +// where y_k = ∇f_{k+1} - ∇f_k. +type DaiYuan struct { + y []float64 +} + +func (dy *DaiYuan) Init(loc *Location) { + dy.y = resize(dy.y, len(loc.Gradient)) +} + +func (dy *DaiYuan) Beta(grad, gradPrev, dirPrev []float64) (beta float64) { + floats.SubTo(dy.y, grad, gradPrev) + norm := floats.Norm(grad, 2) + return norm * norm / floats.Dot(dirPrev, dy.y) +} + +// HagerZhang implements the Hager-Zhang variant of the CG method that computes the +// scaling parameter β_k according to the formula +// +// β_k = (y_k - 2 d_k |y_k|^2/(d_k·y_k))·∇f_{k+1} / (d_k·y_k), +// +// where y_k = ∇f_{k+1} - ∇f_k. +type HagerZhang struct { + y []float64 +} + +func (hz *HagerZhang) Init(loc *Location) { + hz.y = resize(hz.y, len(loc.Gradient)) +} + +func (hz *HagerZhang) Beta(grad, gradPrev, dirPrev []float64) (beta float64) { + floats.SubTo(hz.y, grad, gradPrev) + dirDotY := floats.Dot(dirPrev, hz.y) + gDotY := floats.Dot(grad, hz.y) + gDotDir := floats.Dot(grad, dirPrev) + yNorm := floats.Norm(hz.y, 2) + return (gDotY - 2*gDotDir*yNorm*yNorm/dirDotY) / dirDotY +} diff --git a/vendor/gonum.org/v1/gonum/optimize/cmaes.go b/vendor/gonum.org/v1/gonum/optimize/cmaes.go new file mode 100644 index 0000000000..f635d1f000 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/optimize/cmaes.go @@ -0,0 +1,468 @@ +// Copyright ©2017 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package optimize + +import ( + "math" + "math/rand/v2" + "sort" + + "gonum.org/v1/gonum/floats" + "gonum.org/v1/gonum/mat" + "gonum.org/v1/gonum/stat/distmv" +) + +var _ Method = (*CmaEsChol)(nil) + +// TODO(btracey): If we ever implement the traditional CMA-ES algorithm, provide +// the base explanation there, and modify this description to just +// describe the differences. + +// CmaEsChol implements the covariance matrix adaptation evolution strategy (CMA-ES) +// based on the Cholesky decomposition. The full algorithm is described in +// +// Krause, Oswin, Dídac Rodríguez Arbonès, and Christian Igel. "CMA-ES with +// optimal covariance update and storage complexity." Advances in Neural +// Information Processing Systems. 2016. +// https://papers.nips.cc/paper/6457-cma-es-with-optimal-covariance-update-and-storage-complexity.pdf +// +// CMA-ES is a global optimization method that progressively adapts a population +// of samples. CMA-ES combines techniques from local optimization with global +// optimization. Specifically, the CMA-ES algorithm uses an initial multivariate +// normal distribution to generate a population of input locations. The input locations +// with the lowest function values are used to update the parameters of the normal +// distribution, a new set of input locations are generated, and this procedure +// is iterated until convergence. The initial sampling distribution will have +// a mean specified by the initial x location, and a covariance specified by +// the InitCholesky field. +// +// As the normal distribution is progressively updated according to the best samples, +// it can be that the mean of the distribution is updated in a gradient-descent +// like fashion, followed by a shrinking covariance. +// It is recommended that the algorithm be run multiple times (with different +// InitMean) to have a better chance of finding the global minimum. +// +// The CMA-ES-Chol algorithm differs from the standard CMA-ES algorithm in that +// it directly updates the Cholesky decomposition of the normal distribution. +// This changes the runtime from O(dimension^3) to O(dimension^2*population) +// The evolution of the multi-variate normal will be similar to the baseline +// CMA-ES algorithm, but the covariance update equation is not identical. +// +// For more information about the CMA-ES algorithm, see +// +// https://en.wikipedia.org/wiki/CMA-ES +// https://arxiv.org/pdf/1604.00772.pdf +type CmaEsChol struct { + // InitStepSize sets the initial size of the covariance matrix adaptation. + // If InitStepSize is 0, a default value of 0.5 is used. InitStepSize cannot + // be negative, or CmaEsChol will panic. + InitStepSize float64 + // Population sets the population size for the algorithm. If Population is + // 0, a default value of 4 + math.Floor(3*math.Log(float64(dim))) is used. + // Population cannot be negative or CmaEsChol will panic. + Population int + // InitCholesky specifies the Cholesky decomposition of the covariance + // matrix for the initial sampling distribution. If InitCholesky is nil, + // a default value of I is used. If it is non-nil, then it must have + // InitCholesky.Size() be equal to the problem dimension. + InitCholesky *mat.Cholesky + // StopLogDet sets the threshold for stopping the optimization if the + // distribution becomes too peaked. The log determinant is a measure of the + // (log) "volume" of the normal distribution, and when it is too small + // the samples are almost the same. If the log determinant of the covariance + // matrix becomes less than StopLogDet, the optimization run is concluded. + // If StopLogDet is 0, a default value of dim*log(1e-16) is used. + // If StopLogDet is NaN, the stopping criterion is not used, though + // this can cause numeric instabilities in the algorithm. + StopLogDet float64 + // ForgetBest, when true, does not track the best overall function value found, + // instead returning the new best sample in each iteration. If ForgetBest + // is false, then the minimum value returned will be the lowest across all + // iterations, regardless of when that sample was generated. + ForgetBest bool + // Src allows a random number generator to be supplied for generating samples. + // If Src is nil the generator in golang.org/x/math/rand is used. + Src rand.Source + + // Fixed algorithm parameters. + dim int + pop int + weights []float64 + muEff float64 + cc, cs, c1, cmu, ds float64 + eChi float64 + + // Function data. + xs *mat.Dense + fs []float64 + + // Adaptive algorithm parameters. + invSigma float64 // inverse of the sigma parameter + pc, ps []float64 + mean []float64 + chol mat.Cholesky + + // Overall best. + bestX []float64 + bestF float64 + + // Synchronization. + sentIdx int + receivedIdx int + operation chan<- Task + updateErr error +} + +var ( + _ Statuser = (*CmaEsChol)(nil) + _ Method = (*CmaEsChol)(nil) +) + +func (cma *CmaEsChol) methodConverged() Status { + sd := cma.StopLogDet + switch { + case math.IsNaN(sd): + return NotTerminated + case sd == 0: + sd = float64(cma.dim) * -36.8413614879 // ln(1e-16) + } + if cma.chol.LogDet() < sd { + return MethodConverge + } + return NotTerminated +} + +// Status returns the status of the method. +func (cma *CmaEsChol) Status() (Status, error) { + if cma.updateErr != nil { + return Failure, cma.updateErr + } + return cma.methodConverged(), nil +} + +func (*CmaEsChol) Uses(has Available) (uses Available, err error) { + return has.function() +} + +func (cma *CmaEsChol) Init(dim, tasks int) int { + if dim <= 0 { + panic(nonpositiveDimension) + } + if tasks < 0 { + panic(negativeTasks) + } + + // Set fixed algorithm parameters. + // Parameter values are from https://arxiv.org/pdf/1604.00772.pdf . + cma.dim = dim + cma.pop = cma.Population + n := float64(dim) + if cma.pop == 0 { + cma.pop = 4 + int(3*math.Log(n)) // Note the implicit floor. + } else if cma.pop < 0 { + panic("cma-es-chol: negative population size") + } + mu := cma.pop / 2 + cma.weights = resize(cma.weights, mu) + for i := range cma.weights { + v := math.Log(float64(mu)+0.5) - math.Log(float64(i)+1) + cma.weights[i] = v + } + floats.Scale(1/floats.Sum(cma.weights), cma.weights) + cma.muEff = 0 + for _, v := range cma.weights { + cma.muEff += v * v + } + cma.muEff = 1 / cma.muEff + + cma.cc = (4 + cma.muEff/n) / (n + 4 + 2*cma.muEff/n) + cma.cs = (cma.muEff + 2) / (n + cma.muEff + 5) + cma.c1 = 2 / ((n+1.3)*(n+1.3) + cma.muEff) + cma.cmu = math.Min(1-cma.c1, 2*(cma.muEff-2+1/cma.muEff)/((n+2)*(n+2)+cma.muEff)) + cma.ds = 1 + 2*math.Max(0, math.Sqrt((cma.muEff-1)/(n+1))-1) + cma.cs + // E[chi] is taken from https://en.wikipedia.org/wiki/CMA-ES (there + // listed as E[||N(0,1)||]). + cma.eChi = math.Sqrt(n) * (1 - 1.0/(4*n) + 1/(21*n*n)) + + // Allocate memory for function data. + cma.xs = mat.NewDense(cma.pop, dim, nil) + cma.fs = resize(cma.fs, cma.pop) + + // Allocate and initialize adaptive parameters. + cma.invSigma = 1 / cma.InitStepSize + if cma.InitStepSize == 0 { + cma.invSigma = 10.0 / 3 + } else if cma.InitStepSize < 0 { + panic("cma-es-chol: negative initial step size") + } + cma.pc = resize(cma.pc, dim) + for i := range cma.pc { + cma.pc[i] = 0 + } + cma.ps = resize(cma.ps, dim) + for i := range cma.ps { + cma.ps[i] = 0 + } + cma.mean = resize(cma.mean, dim) // mean location initialized at the start of Run + + if cma.InitCholesky != nil { + if cma.InitCholesky.SymmetricDim() != dim { + panic("cma-es-chol: incorrect InitCholesky size") + } + cma.chol.Clone(cma.InitCholesky) + } else { + // Set the initial Cholesky to I. + b := mat.NewDiagDense(dim, nil) + for i := 0; i < dim; i++ { + b.SetDiag(i, 1) + } + var chol mat.Cholesky + ok := chol.Factorize(b) + if !ok { + panic("cma-es-chol: bad cholesky. shouldn't happen") + } + cma.chol = chol + } + + cma.bestX = resize(cma.bestX, dim) + cma.bestF = math.Inf(1) + + cma.sentIdx = 0 + cma.receivedIdx = 0 + cma.operation = nil + cma.updateErr = nil + t := min(tasks, cma.pop) + return t +} + +func (cma *CmaEsChol) sendInitTasks(tasks []Task) { + for i, task := range tasks { + cma.sendTask(i, task) + } + cma.sentIdx = len(tasks) +} + +// sendTask generates a sample and sends the task. It does not update the cma index. +func (cma *CmaEsChol) sendTask(idx int, task Task) { + task.ID = idx + task.Op = FuncEvaluation + distmv.NormalRand(cma.xs.RawRowView(idx), cma.mean, &cma.chol, cma.Src) + copy(task.X, cma.xs.RawRowView(idx)) + cma.operation <- task +} + +// bestIdx returns the best index in the functions. Returns -1 if all values +// are NaN. +func (cma *CmaEsChol) bestIdx() int { + best := -1 + bestVal := math.Inf(1) + for i, v := range cma.fs { + if math.IsNaN(v) { + continue + } + // Use equality in case somewhere evaluates to +inf. + if v <= bestVal { + best = i + bestVal = v + } + } + return best +} + +// findBestAndUpdateTask finds the best task in the current list, updates the +// new best overall, and then stores the best location into task. +func (cma *CmaEsChol) findBestAndUpdateTask(task Task) Task { + // Find and update the best location. + // Don't use floats because there may be NaN values. + best := cma.bestIdx() + bestF := math.NaN() + bestX := cma.xs.RawRowView(0) + if best != -1 { + bestF = cma.fs[best] + bestX = cma.xs.RawRowView(best) + } + if cma.ForgetBest { + task.F = bestF + copy(task.X, bestX) + } else { + if bestF < cma.bestF { + cma.bestF = bestF + copy(cma.bestX, bestX) + } + task.F = cma.bestF + copy(task.X, cma.bestX) + } + return task +} + +func (cma *CmaEsChol) Run(operations chan<- Task, results <-chan Task, tasks []Task) { + copy(cma.mean, tasks[0].X) + cma.operation = operations + // Send the initial tasks. We know there are at most as many tasks as elements + // of the population. + cma.sendInitTasks(tasks) + +Loop: + for { + result := <-results + switch result.Op { + default: + panic("unknown operation") + case PostIteration: + break Loop + case MajorIteration: + // The last thing we did was update all of the tasks and send the + // major iteration. Now we can send a group of tasks again. + cma.sendInitTasks(tasks) + case FuncEvaluation: + cma.receivedIdx++ + cma.fs[result.ID] = result.F + switch { + case cma.sentIdx < cma.pop: + // There are still tasks to evaluate. Send the next. + cma.sendTask(cma.sentIdx, result) + cma.sentIdx++ + case cma.receivedIdx < cma.pop: + // All the tasks have been sent, but not all of them have been received. + // Need to wait until all are back. + continue Loop + default: + // All of the evaluations have been received. + if cma.receivedIdx != cma.pop { + panic("bad logic") + } + cma.receivedIdx = 0 + cma.sentIdx = 0 + + task := cma.findBestAndUpdateTask(result) + // Update the parameters and send a MajorIteration or a convergence. + err := cma.update() + // Kill the existing data. + for i := range cma.fs { + cma.fs[i] = math.NaN() + cma.xs.Set(i, 0, math.NaN()) + } + switch { + case err != nil: + cma.updateErr = err + task.Op = MethodDone + case cma.methodConverged() != NotTerminated: + task.Op = MethodDone + default: + task.Op = MajorIteration + task.ID = -1 + } + operations <- task + } + } + } + + // Been told to stop. Clean up. + // Need to see best of our evaluated tasks so far. Should instead just + // collect, then see. + for task := range results { + switch task.Op { + case MajorIteration: + case FuncEvaluation: + cma.fs[task.ID] = task.F + default: + panic("unknown operation") + } + } + // Send the new best value if the evaluation is better than any we've + // found so far. Keep this separate from findBestAndUpdateTask so that + // we only send an iteration if we find a better location. + if !cma.ForgetBest { + best := cma.bestIdx() + if best != -1 && cma.fs[best] < cma.bestF { + task := tasks[0] + task.F = cma.fs[best] + copy(task.X, cma.xs.RawRowView(best)) + task.Op = MajorIteration + task.ID = -1 + operations <- task + } + } + close(operations) +} + +// update computes the new parameters (mean, cholesky, etc.). Does not update +// any of the synchronization parameters (taskIdx). +func (cma *CmaEsChol) update() error { + // Sort the function values to find the elite samples. + ftmp := make([]float64, cma.pop) + copy(ftmp, cma.fs) + indexes := make([]int, cma.pop) + for i := range indexes { + indexes[i] = i + } + sort.Sort(bestSorter{F: ftmp, Idx: indexes}) + + meanOld := make([]float64, len(cma.mean)) + copy(meanOld, cma.mean) + + // m_{t+1} = \sum_{i=1}^mu w_i x_i + for i := range cma.mean { + cma.mean[i] = 0 + } + for i, w := range cma.weights { + idx := indexes[i] // index of teh 1337 sample. + floats.AddScaled(cma.mean, w, cma.xs.RawRowView(idx)) + } + meanDiff := make([]float64, len(cma.mean)) + floats.SubTo(meanDiff, cma.mean, meanOld) + + // p_{c,t+1} = (1-c_c) p_{c,t} + \sqrt(c_c*(2-c_c)*mueff) (m_{t+1}-m_t)/sigma_t + floats.Scale(1-cma.cc, cma.pc) + scaleC := math.Sqrt(cma.cc*(2-cma.cc)*cma.muEff) * cma.invSigma + floats.AddScaled(cma.pc, scaleC, meanDiff) + + // p_{sigma, t+1} = (1-c_sigma) p_{sigma,t} + \sqrt(c_s*(2-c_s)*mueff) A_t^-1 (m_{t+1}-m_t)/sigma_t + floats.Scale(1-cma.cs, cma.ps) + // First compute A_t^-1 (m_{t+1}-m_t), then add the scaled vector. + tmp := make([]float64, cma.dim) + tmpVec := mat.NewVecDense(cma.dim, tmp) + diffVec := mat.NewVecDense(cma.dim, meanDiff) + err := tmpVec.SolveVec(cma.chol.RawU().T(), diffVec) + if err != nil { + return err + } + scaleS := math.Sqrt(cma.cs*(2-cma.cs)*cma.muEff) * cma.invSigma + floats.AddScaled(cma.ps, scaleS, tmp) + + // Compute the update to A. + scaleChol := 1 - cma.c1 - cma.cmu + if scaleChol == 0 { + scaleChol = math.SmallestNonzeroFloat64 // enough to kill the old data, but still non-zero. + } + cma.chol.Scale(scaleChol, &cma.chol) + cma.chol.SymRankOne(&cma.chol, cma.c1, mat.NewVecDense(cma.dim, cma.pc)) + for i, w := range cma.weights { + idx := indexes[i] + floats.SubTo(tmp, cma.xs.RawRowView(idx), meanOld) + cma.chol.SymRankOne(&cma.chol, cma.cmu*w*cma.invSigma, tmpVec) + } + + // sigma_{t+1} = sigma_t exp(c_sigma/d_sigma * norm(p_{sigma,t+1}/ E[chi] -1) + normPs := floats.Norm(cma.ps, 2) + cma.invSigma /= math.Exp(cma.cs / cma.ds * (normPs/cma.eChi - 1)) + return nil +} + +type bestSorter struct { + F []float64 + Idx []int +} + +func (b bestSorter) Len() int { + return len(b.F) +} +func (b bestSorter) Less(i, j int) bool { + return b.F[i] < b.F[j] +} +func (b bestSorter) Swap(i, j int) { + b.F[i], b.F[j] = b.F[j], b.F[i] + b.Idx[i], b.Idx[j] = b.Idx[j], b.Idx[i] +} diff --git a/vendor/gonum.org/v1/gonum/optimize/doc.go b/vendor/gonum.org/v1/gonum/optimize/doc.go new file mode 100644 index 0000000000..667e8f94e9 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/optimize/doc.go @@ -0,0 +1,6 @@ +// Copyright ©2015 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package optimize implements algorithms for finding the optimum value of functions. +package optimize // import "gonum.org/v1/gonum/optimize" diff --git a/vendor/gonum.org/v1/gonum/optimize/errors.go b/vendor/gonum.org/v1/gonum/optimize/errors.go new file mode 100644 index 0000000000..7d6f8aee02 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/optimize/errors.go @@ -0,0 +1,78 @@ +// Copyright ©2014 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package optimize + +import ( + "errors" + "fmt" + "math" +) + +var ( + // ErrZeroDimensional signifies an optimization was called with an input of length 0. + ErrZeroDimensional = errors.New("optimize: zero dimensional input") + + // ErrLinesearcherFailure signifies that a Linesearcher has iterated too + // many times. This may occur if the gradient tolerance is set too low. + ErrLinesearcherFailure = errors.New("linesearch: failed to converge") + + // ErrNonDescentDirection signifies that LinesearchMethod has received a + // search direction from a NextDirectioner in which the function is not + // decreasing. + ErrNonDescentDirection = errors.New("linesearch: non-descent search direction") + + // ErrNoProgress signifies that LinesearchMethod cannot make further + // progress because there is no change in location after Linesearcher step + // due to floating-point arithmetic. + ErrNoProgress = errors.New("linesearch: no change in location after Linesearcher step") + + // ErrLinesearcherBound signifies that a Linesearcher reached a step that + // lies out of allowed bounds. + ErrLinesearcherBound = errors.New("linesearch: step out of bounds") + + // ErrMissingGrad signifies that a Method requires a Gradient function that + // is not supplied by Problem. + ErrMissingGrad = errors.New("optimize: problem does not provide needed Grad function") + + // ErrMissingHess signifies that a Method requires a Hessian function that + // is not supplied by Problem. + ErrMissingHess = errors.New("optimize: problem does not provide needed Hess function") +) + +// ErrFunc is returned when an initial function value is invalid. The error +// state may be either +Inf or NaN. ErrFunc satisfies the error interface. +type ErrFunc float64 + +func (err ErrFunc) Error() string { + switch { + case math.IsInf(float64(err), 1): + return "optimize: initial function value is infinite" + case math.IsNaN(float64(err)): + return "optimize: initial function value is NaN" + default: + panic("optimize: bad ErrFunc") + } +} + +// ErrGrad is returned when an initial gradient is invalid. The error gradient +// may be either ±Inf or NaN. ErrGrad satisfies the error interface. +type ErrGrad struct { + Grad float64 // Grad is the invalid gradient value. + Index int // Index is the position at which the invalid gradient was found. +} + +func (err ErrGrad) Error() string { + switch { + case math.IsInf(err.Grad, 0): + return fmt.Sprintf("optimize: initial gradient is infinite at position %d", err.Index) + case math.IsNaN(err.Grad): + return fmt.Sprintf("optimize: initial gradient is NaN at position %d", err.Index) + default: + panic("optimize: bad ErrGrad") + } +} + +// List of shared panic strings +const badProblem = "optimize: objective function is undefined" diff --git a/vendor/gonum.org/v1/gonum/optimize/functionconvergence.go b/vendor/gonum.org/v1/gonum/optimize/functionconvergence.go new file mode 100644 index 0000000000..d5b12c307d --- /dev/null +++ b/vendor/gonum.org/v1/gonum/optimize/functionconvergence.go @@ -0,0 +1,85 @@ +// Copyright ©2015 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package optimize + +import ( + "math" +) + +// Converger returns the convergence of the optimization based on +// locations found during optimization. Converger must not modify the value of +// the provided Location in any of the methods. +type Converger interface { + Init(dim int) + Converged(loc *Location) Status +} + +var ( + _ Converger = NeverTerminate{} + _ Converger = (*FunctionConverge)(nil) +) + +// NeverTerminate implements Converger, always reporting NotTerminated. +type NeverTerminate struct{} + +func (NeverTerminate) Init(dim int) {} + +func (NeverTerminate) Converged(loc *Location) Status { + return NotTerminated +} + +// FunctionConverge tests for insufficient improvement in the optimum value +// over the last iterations. A FunctionConvergence status is returned if +// there is no significant decrease for FunctionConverge.Iterations. A +// significant decrease is considered if +// +// f < f_best +// +// and +// +// f_best - f > FunctionConverge.Relative * maxabs(f, f_best) + FunctionConverge.Absolute +// +// If the decrease is significant, then the iteration counter is reset and +// f_best is updated. +// +// If FunctionConverge.Iterations == 0, it has no effect. +type FunctionConverge struct { + Absolute float64 + Relative float64 + Iterations int + + first bool + best float64 + iter int +} + +func (fc *FunctionConverge) Init(dim int) { + fc.first = true + fc.best = 0 + fc.iter = 0 +} + +func (fc *FunctionConverge) Converged(l *Location) Status { + f := l.F + if fc.first { + fc.best = f + fc.first = false + return NotTerminated + } + if fc.Iterations == 0 { + return NotTerminated + } + maxAbs := math.Max(math.Abs(f), math.Abs(fc.best)) + if f < fc.best && fc.best-f > fc.Relative*maxAbs+fc.Absolute { + fc.best = f + fc.iter = 0 + return NotTerminated + } + fc.iter++ + if fc.iter < fc.Iterations { + return NotTerminated + } + return FunctionConvergence +} diff --git a/vendor/gonum.org/v1/gonum/optimize/gradientdescent.go b/vendor/gonum.org/v1/gonum/optimize/gradientdescent.go new file mode 100644 index 0000000000..d11896594a --- /dev/null +++ b/vendor/gonum.org/v1/gonum/optimize/gradientdescent.go @@ -0,0 +1,95 @@ +// Copyright ©2014 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package optimize + +import "gonum.org/v1/gonum/floats" + +var ( + _ Method = (*GradientDescent)(nil) + _ localMethod = (*GradientDescent)(nil) + _ NextDirectioner = (*GradientDescent)(nil) +) + +// GradientDescent implements the steepest descent optimization method that +// performs successive steps along the direction of the negative gradient. +type GradientDescent struct { + // Linesearcher selects suitable steps along the descent direction. + // If Linesearcher is nil, a reasonable default will be chosen. + Linesearcher Linesearcher + // StepSizer determines the initial step size along each direction. + // If StepSizer is nil, a reasonable default will be chosen. + StepSizer StepSizer + // GradStopThreshold sets the threshold for stopping if the gradient norm + // gets too small. If GradStopThreshold is 0 it is defaulted to 1e-12, and + // if it is NaN the setting is not used. + GradStopThreshold float64 + + ls *LinesearchMethod + + status Status + err error +} + +func (g *GradientDescent) Status() (Status, error) { + return g.status, g.err +} + +func (*GradientDescent) Uses(has Available) (uses Available, err error) { + return has.gradient() +} + +func (g *GradientDescent) Init(dim, tasks int) int { + g.status = NotTerminated + g.err = nil + return 1 +} + +func (g *GradientDescent) Run(operation chan<- Task, result <-chan Task, tasks []Task) { + g.status, g.err = localOptimizer{}.run(g, g.GradStopThreshold, operation, result, tasks) + close(operation) +} + +func (g *GradientDescent) initLocal(loc *Location) (Operation, error) { + if g.Linesearcher == nil { + g.Linesearcher = &Backtracking{} + } + if g.StepSizer == nil { + g.StepSizer = &QuadraticStepSize{} + } + + if g.ls == nil { + g.ls = &LinesearchMethod{} + } + g.ls.Linesearcher = g.Linesearcher + g.ls.NextDirectioner = g + + return g.ls.Init(loc) +} + +func (g *GradientDescent) iterateLocal(loc *Location) (Operation, error) { + return g.ls.Iterate(loc) +} + +func (g *GradientDescent) InitDirection(loc *Location, dir []float64) (stepSize float64) { + copy(dir, loc.Gradient) + floats.Scale(-1, dir) + return g.StepSizer.Init(loc, dir) +} + +func (g *GradientDescent) NextDirection(loc *Location, dir []float64) (stepSize float64) { + copy(dir, loc.Gradient) + floats.Scale(-1, dir) + return g.StepSizer.StepSize(loc, dir) +} + +func (*GradientDescent) needs() struct { + Gradient bool + Hessian bool +} { + return struct { + Gradient bool + Hessian bool + }{true, false} +} diff --git a/vendor/gonum.org/v1/gonum/optimize/guessandcheck.go b/vendor/gonum.org/v1/gonum/optimize/guessandcheck.go new file mode 100644 index 0000000000..35d2792681 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/optimize/guessandcheck.go @@ -0,0 +1,92 @@ +// Copyright ©2016 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package optimize + +import ( + "math" + + "gonum.org/v1/gonum/stat/distmv" +) + +var _ Method = (*GuessAndCheck)(nil) + +// GuessAndCheck is a global optimizer that evaluates the function at random +// locations. Not a good optimizer, but useful for comparison and debugging. +type GuessAndCheck struct { + Rander distmv.Rander + + bestF float64 + bestX []float64 +} + +func (*GuessAndCheck) Uses(has Available) (uses Available, err error) { + return has.function() +} + +func (g *GuessAndCheck) Init(dim, tasks int) int { + if dim <= 0 { + panic(nonpositiveDimension) + } + if tasks < 0 { + panic(negativeTasks) + } + g.bestF = math.Inf(1) + g.bestX = resize(g.bestX, dim) + return tasks +} + +func (g *GuessAndCheck) sendNewLoc(operation chan<- Task, task Task) { + g.Rander.Rand(task.X) + task.Op = FuncEvaluation + operation <- task +} + +func (g *GuessAndCheck) updateMajor(operation chan<- Task, task Task) { + // Update the best value seen so far, and send a MajorIteration. + if task.F < g.bestF { + g.bestF = task.F + copy(g.bestX, task.X) + } else { + task.F = g.bestF + copy(task.X, g.bestX) + } + task.Op = MajorIteration + operation <- task +} + +func (g *GuessAndCheck) Run(operation chan<- Task, result <-chan Task, tasks []Task) { + // Send initial tasks to evaluate + for _, task := range tasks { + g.sendNewLoc(operation, task) + } + + // Read from the channel until PostIteration is sent. +Loop: + for { + task := <-result + switch task.Op { + default: + panic("unknown operation") + case PostIteration: + break Loop + case MajorIteration: + g.sendNewLoc(operation, task) + case FuncEvaluation: + g.updateMajor(operation, task) + } + } + + // PostIteration was sent. Update the best new values. + for task := range result { + switch task.Op { + default: + panic("unknown operation") + case MajorIteration: + case FuncEvaluation: + g.updateMajor(operation, task) + } + } + close(operation) +} diff --git a/vendor/gonum.org/v1/gonum/optimize/interfaces.go b/vendor/gonum.org/v1/gonum/optimize/interfaces.go new file mode 100644 index 0000000000..09d395a2f8 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/optimize/interfaces.go @@ -0,0 +1,132 @@ +// Copyright ©2014 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package optimize + +// A localMethod can optimize an objective function. +// +// It uses a reverse-communication interface between the optimization method +// and the caller. Method acts as a client that asks the caller to perform +// needed operations via Operation returned from Init and Iterate methods. +// This provides independence of the optimization algorithm on user-supplied +// data and their representation, and enables automation of common operations +// like checking for (various types of) convergence and maintaining statistics. +// +// A Method can command an Evaluation, a MajorIteration or NoOperation operations. +// +// An evaluation operation is one or more of the Evaluation operations +// (FuncEvaluation, GradEvaluation, etc.) which can be combined with +// the bitwise or operator. In an evaluation operation, the requested fields of +// Problem will be evaluated at the point specified in Location.X. +// The corresponding fields of Location will be filled with the results that +// can be retrieved upon the next call to Iterate. The Method interface +// requires that entries of Location are not modified aside from the commanded +// evaluations. Thus, the type implementing Method may use multiple Operations +// to set the Location fields at a particular x value. +// +// Instead of an Evaluation, a Method may declare MajorIteration. In +// a MajorIteration, the values in the fields of Location are treated as +// a potential optimizer. The convergence of the optimization routine +// (GradientThreshold, etc.) is checked at this new best point. In +// a MajorIteration, the fields of Location must be valid and consistent. +// +// A Method must not return InitIteration and PostIteration operations. These are +// reserved for the clients to be passed to Recorders. A Method must also not +// combine the Evaluation operations with the Iteration operations. +type localMethod interface { + // Init initializes the method based on the initial data in loc, updates it + // and returns the first operation to be carried out by the caller. + // The initial location must be valid as specified by Needs. + initLocal(loc *Location) (Operation, error) + + // Iterate retrieves data from loc, performs one iteration of the method, + // updates loc and returns the next operation. + iterateLocal(loc *Location) (Operation, error) + + needser +} + +type needser interface { + // needs specifies information about the objective function needed by the + // optimizer beyond just the function value. The information is used + // internally for initialization and must match evaluation types returned + // by Init and Iterate during the optimization process. + needs() struct { + Gradient bool + Hessian bool + } +} + +// Statuser can report the status and any error. It is intended for methods as +// an additional error reporting mechanism apart from the errors returned from +// Init and Iterate. +type Statuser interface { + Status() (Status, error) +} + +// Linesearcher is a type that can perform a line search. It tries to find an +// (approximate) minimum of the objective function along the search direction +// dir_k starting at the most recent location x_k, i.e., it tries to minimize +// the function +// +// φ(step) := f(x_k + step * dir_k) where step > 0. +// +// Typically, a Linesearcher will be used in conjunction with LinesearchMethod +// for performing gradient-based optimization through sequential line searches. +type Linesearcher interface { + // Init initializes the Linesearcher and a new line search. Value and + // derivative contain φ(0) and φ'(0), respectively, and step contains the + // first trial step length. It returns an Operation that must be one of + // FuncEvaluation, GradEvaluation, FuncEvaluation|GradEvaluation. The + // caller must evaluate φ(step), φ'(step), or both, respectively, and pass + // the result to Linesearcher in value and derivative arguments to Iterate. + Init(value, derivative float64, step float64) Operation + + // Iterate takes in the values of φ and φ' evaluated at the previous step + // and returns the next operation. + // + // If op is one of FuncEvaluation, GradEvaluation, + // FuncEvaluation|GradEvaluation, the caller must evaluate φ(step), + // φ'(step), or both, respectively, and pass the result to Linesearcher in + // value and derivative arguments on the next call to Iterate. + // + // If op is MajorIteration, a sufficiently accurate minimum of φ has been + // found at the previous step and the line search has concluded. Init must + // be called again to initialize a new line search. + // + // If err is nil, op must not specify another operation. If err is not nil, + // the values of op and step are undefined. + Iterate(value, derivative float64) (op Operation, step float64, err error) +} + +// NextDirectioner implements a strategy for computing a new line search +// direction at each major iteration. Typically, a NextDirectioner will be +// used in conjunction with LinesearchMethod for performing gradient-based +// optimization through sequential line searches. +type NextDirectioner interface { + // InitDirection initializes the NextDirectioner at the given starting location, + // putting the initial direction in place into dir, and returning the initial + // step size. InitDirection must not modify Location. + InitDirection(loc *Location, dir []float64) (step float64) + + // NextDirection updates the search direction and step size. Location is + // the location seen at the conclusion of the most recent linesearch. The + // next search direction is put in place into dir, and the next step size + // is returned. NextDirection must not modify Location. + NextDirection(loc *Location, dir []float64) (step float64) +} + +// StepSizer can set the next step size of the optimization given the last Location. +// Returned step size must be positive. +type StepSizer interface { + Init(loc *Location, dir []float64) float64 + StepSize(loc *Location, dir []float64) float64 +} + +// A Recorder can record the progress of the optimization, for example to print +// the progress to StdOut or to a log file. A Recorder must not modify any data. +type Recorder interface { + Init() error + Record(*Location, Operation, *Stats) error +} diff --git a/vendor/gonum.org/v1/gonum/optimize/lbfgs.go b/vendor/gonum.org/v1/gonum/optimize/lbfgs.go new file mode 100644 index 0000000000..6caad9c330 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/optimize/lbfgs.go @@ -0,0 +1,199 @@ +// Copyright ©2014 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package optimize + +import ( + "gonum.org/v1/gonum/floats" +) + +var ( + _ Method = (*LBFGS)(nil) + _ localMethod = (*LBFGS)(nil) + _ NextDirectioner = (*LBFGS)(nil) +) + +// LBFGS implements the limited-memory BFGS method for gradient-based +// unconstrained minimization. +// +// It stores a modified version of the inverse Hessian approximation H +// implicitly from the last Store iterations while the normal BFGS method +// stores and manipulates H directly as a dense matrix. Therefore LBFGS is more +// appropriate than BFGS for large problems as the cost of LBFGS scales as +// O(Store * dim) while BFGS scales as O(dim^2). The "forgetful" nature of +// LBFGS may also make it perform better than BFGS for functions with Hessians +// that vary rapidly spatially. +type LBFGS struct { + // Linesearcher selects suitable steps along the descent direction. + // Accepted steps should satisfy the strong Wolfe conditions. + // If Linesearcher is nil, a reasonable default will be chosen. + Linesearcher Linesearcher + // Store is the size of the limited-memory storage. + // If Store is 0, it will be defaulted to 15. + Store int + // GradStopThreshold sets the threshold for stopping if the gradient norm + // gets too small. If GradStopThreshold is 0 it is defaulted to 1e-12, and + // if it is NaN the setting is not used. + GradStopThreshold float64 + + status Status + err error + + ls *LinesearchMethod + + dim int // Dimension of the problem + x []float64 // Location at the last major iteration + grad []float64 // Gradient at the last major iteration + + // History + oldest int // Index of the oldest element of the history + y [][]float64 // Last Store values of y + s [][]float64 // Last Store values of s + rho []float64 // Last Store values of rho + a []float64 // Cache of Hessian updates +} + +func (l *LBFGS) Status() (Status, error) { + return l.status, l.err +} + +func (*LBFGS) Uses(has Available) (uses Available, err error) { + return has.gradient() +} + +func (l *LBFGS) Init(dim, tasks int) int { + l.status = NotTerminated + l.err = nil + return 1 +} + +func (l *LBFGS) Run(operation chan<- Task, result <-chan Task, tasks []Task) { + l.status, l.err = localOptimizer{}.run(l, l.GradStopThreshold, operation, result, tasks) + close(operation) +} + +func (l *LBFGS) initLocal(loc *Location) (Operation, error) { + if l.Linesearcher == nil { + l.Linesearcher = &Bisection{} + } + if l.Store == 0 { + l.Store = 15 + } + + if l.ls == nil { + l.ls = &LinesearchMethod{} + } + l.ls.Linesearcher = l.Linesearcher + l.ls.NextDirectioner = l + + return l.ls.Init(loc) +} + +func (l *LBFGS) iterateLocal(loc *Location) (Operation, error) { + return l.ls.Iterate(loc) +} + +func (l *LBFGS) InitDirection(loc *Location, dir []float64) (stepSize float64) { + dim := len(loc.X) + l.dim = dim + l.oldest = 0 + + l.a = resize(l.a, l.Store) + l.rho = resize(l.rho, l.Store) + l.y = l.initHistory(l.y) + l.s = l.initHistory(l.s) + + l.x = resize(l.x, dim) + copy(l.x, loc.X) + + l.grad = resize(l.grad, dim) + copy(l.grad, loc.Gradient) + + copy(dir, loc.Gradient) + floats.Scale(-1, dir) + return 1 / floats.Norm(dir, 2) +} + +func (l *LBFGS) initHistory(hist [][]float64) [][]float64 { + c := cap(hist) + if c < l.Store { + n := make([][]float64, l.Store-c) + hist = append(hist[:c], n...) + } + hist = hist[:l.Store] + for i := range hist { + hist[i] = resize(hist[i], l.dim) + for j := range hist[i] { + hist[i][j] = 0 + } + } + return hist +} + +func (l *LBFGS) NextDirection(loc *Location, dir []float64) (stepSize float64) { + // Uses two-loop correction as described in + // Nocedal, J., Wright, S.: Numerical Optimization (2nd ed). Springer (2006), chapter 7, page 178. + + if len(loc.X) != l.dim { + panic("lbfgs: unexpected size mismatch") + } + if len(loc.Gradient) != l.dim { + panic("lbfgs: unexpected size mismatch") + } + if len(dir) != l.dim { + panic("lbfgs: unexpected size mismatch") + } + + y := l.y[l.oldest] + floats.SubTo(y, loc.Gradient, l.grad) + s := l.s[l.oldest] + floats.SubTo(s, loc.X, l.x) + sDotY := floats.Dot(s, y) + l.rho[l.oldest] = 1 / sDotY + + l.oldest = (l.oldest + 1) % l.Store + + copy(l.x, loc.X) + copy(l.grad, loc.Gradient) + copy(dir, loc.Gradient) + + // Start with the most recent element and go backward, + for i := 0; i < l.Store; i++ { + idx := l.oldest - i - 1 + if idx < 0 { + idx += l.Store + } + l.a[idx] = l.rho[idx] * floats.Dot(l.s[idx], dir) + floats.AddScaled(dir, -l.a[idx], l.y[idx]) + } + + // Scale the initial Hessian. + gamma := sDotY / floats.Dot(y, y) + floats.Scale(gamma, dir) + + // Start with the oldest element and go forward. + for i := 0; i < l.Store; i++ { + idx := i + l.oldest + if idx >= l.Store { + idx -= l.Store + } + beta := l.rho[idx] * floats.Dot(l.y[idx], dir) + floats.AddScaled(dir, l.a[idx]-beta, l.s[idx]) + } + + // dir contains H^{-1} * g, so flip the direction for minimization. + floats.Scale(-1, dir) + + return 1 +} + +func (*LBFGS) needs() struct { + Gradient bool + Hessian bool +} { + return struct { + Gradient bool + Hessian bool + }{true, false} +} diff --git a/vendor/gonum.org/v1/gonum/optimize/linesearch.go b/vendor/gonum.org/v1/gonum/optimize/linesearch.go new file mode 100644 index 0000000000..0fb1dd6ce5 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/optimize/linesearch.go @@ -0,0 +1,218 @@ +// Copyright ©2014 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package optimize + +import ( + "math" + + "gonum.org/v1/gonum/floats" +) + +// LinesearchMethod represents an abstract optimization method in which a +// function is optimized through successive line search optimizations. +type LinesearchMethod struct { + // NextDirectioner specifies the search direction of each linesearch. + NextDirectioner NextDirectioner + // Linesearcher performs a linesearch along the search direction. + Linesearcher Linesearcher + + x []float64 // Starting point for the current iteration. + dir []float64 // Search direction for the current iteration. + + first bool // Indicator of the first iteration. + nextMajor bool // Indicates that MajorIteration must be commanded at the next call to Iterate. + eval Operation // Indicator of valid fields in Location. + + lastStep float64 // Step taken from x in the previous call to Iterate. + lastOp Operation // Operation returned from the previous call to Iterate. +} + +func (ls *LinesearchMethod) Init(loc *Location) (Operation, error) { + if loc.Gradient == nil { + panic("linesearch: gradient is nil") + } + + dim := len(loc.X) + ls.x = resize(ls.x, dim) + ls.dir = resize(ls.dir, dim) + + ls.first = true + ls.nextMajor = false + + // Indicate that all fields of loc are valid. + ls.eval = FuncEvaluation | GradEvaluation + if loc.Hessian != nil { + ls.eval |= HessEvaluation + } + + ls.lastStep = math.NaN() + ls.lastOp = NoOperation + + return ls.initNextLinesearch(loc) +} + +func (ls *LinesearchMethod) Iterate(loc *Location) (Operation, error) { + switch ls.lastOp { + case NoOperation: + // TODO(vladimir-ch): Either Init has not been called, or the caller is + // trying to resume the optimization run after Iterate previously + // returned with an error. Decide what is the proper thing to do. See also #125. + + case MajorIteration: + // The previous updated location did not converge the full + // optimization. Initialize a new Linesearch. + return ls.initNextLinesearch(loc) + + default: + // Update the indicator of valid fields of loc. + ls.eval |= ls.lastOp + + if ls.nextMajor { + ls.nextMajor = false + + // Linesearcher previously finished, and the invalid fields of loc + // have now been validated. Announce MajorIteration. + ls.lastOp = MajorIteration + return ls.lastOp, nil + } + } + + // Continue the linesearch. + + f := math.NaN() + if ls.eval&FuncEvaluation != 0 { + f = loc.F + } + projGrad := math.NaN() + if ls.eval&GradEvaluation != 0 { + projGrad = floats.Dot(loc.Gradient, ls.dir) + } + op, step, err := ls.Linesearcher.Iterate(f, projGrad) + if err != nil { + return ls.error(err) + } + + switch op { + case MajorIteration: + // Linesearch has been finished. + + ls.lastOp = complementEval(loc, ls.eval) + if ls.lastOp == NoOperation { + // loc is complete, MajorIteration can be declared directly. + ls.lastOp = MajorIteration + } else { + // Declare MajorIteration on the next call to Iterate. + ls.nextMajor = true + } + + case FuncEvaluation, GradEvaluation, FuncEvaluation | GradEvaluation: + if step != ls.lastStep { + // We are moving to a new location, and not, say, evaluating extra + // information at the current location. + + // Compute the next evaluation point and store it in loc.X. + floats.AddScaledTo(loc.X, ls.x, step, ls.dir) + if floats.Equal(ls.x, loc.X) { + // Step size has become so small that the next evaluation point is + // indistinguishable from the starting point for the current + // iteration due to rounding errors. + return ls.error(ErrNoProgress) + } + ls.lastStep = step + ls.eval = NoOperation // Indicate all invalid fields of loc. + } + ls.lastOp = op + + default: + panic("linesearch: Linesearcher returned invalid operation") + } + + return ls.lastOp, nil +} + +func (ls *LinesearchMethod) error(err error) (Operation, error) { + ls.lastOp = NoOperation + return ls.lastOp, err +} + +// initNextLinesearch initializes the next linesearch using the previous +// complete location stored in loc. It fills loc.X and returns an evaluation +// to be performed at loc.X. +func (ls *LinesearchMethod) initNextLinesearch(loc *Location) (Operation, error) { + copy(ls.x, loc.X) + + var step float64 + if ls.first { + ls.first = false + step = ls.NextDirectioner.InitDirection(loc, ls.dir) + } else { + step = ls.NextDirectioner.NextDirection(loc, ls.dir) + } + + projGrad := floats.Dot(loc.Gradient, ls.dir) + if projGrad >= 0 { + return ls.error(ErrNonDescentDirection) + } + + op := ls.Linesearcher.Init(loc.F, projGrad, step) + switch op { + case FuncEvaluation, GradEvaluation, FuncEvaluation | GradEvaluation: + default: + panic("linesearch: Linesearcher returned invalid operation") + } + + floats.AddScaledTo(loc.X, ls.x, step, ls.dir) + if floats.Equal(ls.x, loc.X) { + // Step size is so small that the next evaluation point is + // indistinguishable from the starting point for the current iteration + // due to rounding errors. + return ls.error(ErrNoProgress) + } + + ls.lastStep = step + ls.eval = NoOperation // Invalidate all fields of loc. + + ls.lastOp = op + return ls.lastOp, nil +} + +// ArmijoConditionMet returns true if the Armijo condition (aka sufficient +// decrease) has been met. Under normal conditions, the following should be +// true, though this is not enforced: +// - initGrad < 0 +// - step > 0 +// - 0 < decrease < 1 +func ArmijoConditionMet(currObj, initObj, initGrad, step, decrease float64) bool { + return currObj <= initObj+decrease*step*initGrad +} + +// StrongWolfeConditionsMet returns true if the strong Wolfe conditions have been met. +// The strong Wolfe conditions ensure sufficient decrease in the function +// value, and sufficient decrease in the magnitude of the projected gradient. +// Under normal conditions, the following should be true, though this is not +// enforced: +// - initGrad < 0 +// - step > 0 +// - 0 <= decrease < curvature < 1 +func StrongWolfeConditionsMet(currObj, currGrad, initObj, initGrad, step, decrease, curvature float64) bool { + if currObj > initObj+decrease*step*initGrad { + return false + } + return math.Abs(currGrad) < curvature*math.Abs(initGrad) +} + +// WeakWolfeConditionsMet returns true if the weak Wolfe conditions have been met. +// The weak Wolfe conditions ensure sufficient decrease in the function value, +// and sufficient decrease in the value of the projected gradient. Under normal +// conditions, the following should be true, though this is not enforced: +// - initGrad < 0 +// - step > 0 +// - 0 <= decrease < curvature< 1 +func WeakWolfeConditionsMet(currObj, currGrad, initObj, initGrad, step, decrease, curvature float64) bool { + if currObj > initObj+decrease*step*initGrad { + return false + } + return currGrad >= curvature*initGrad +} diff --git a/vendor/gonum.org/v1/gonum/optimize/listsearch.go b/vendor/gonum.org/v1/gonum/optimize/listsearch.go new file mode 100644 index 0000000000..1771892b79 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/optimize/listsearch.go @@ -0,0 +1,123 @@ +// Copyright ©2018 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package optimize + +import ( + "math" + + "gonum.org/v1/gonum/mat" +) + +var _ Method = (*ListSearch)(nil) + +// ListSearch finds the optimum location from a specified list of possible +// optimum locations. +type ListSearch struct { + // Locs is the list of locations to optimize. Each row of Locs is a location + // to optimize. The number of columns of Locs must match the dimensions + // passed to InitGlobal, and Locs must have at least one row. + Locs mat.Matrix + + eval int + rows int + bestF float64 + bestIdx int +} + +func (*ListSearch) Uses(has Available) (uses Available, err error) { + return has.function() +} + +// Init initializes the method for optimization. The input dimension +// must match the number of columns of Locs. +func (l *ListSearch) Init(dim, tasks int) int { + if dim <= 0 { + panic(nonpositiveDimension) + } + if tasks < 0 { + panic(negativeTasks) + } + r, c := l.Locs.Dims() + if r == 0 { + panic("listsearch: list matrix has no rows") + } + if c != dim { + panic("listsearch: supplied dimension does not match list columns") + } + l.eval = 0 + l.rows = r + l.bestF = math.Inf(1) + l.bestIdx = -1 + return min(r, tasks) +} + +func (l *ListSearch) sendNewLoc(operation chan<- Task, task Task) { + task.Op = FuncEvaluation + task.ID = l.eval + mat.Row(task.X, l.eval, l.Locs) + l.eval++ + operation <- task +} + +func (l *ListSearch) updateMajor(operation chan<- Task, task Task) { + // Update the best value seen so far, and send a MajorIteration. + if task.F < l.bestF { + l.bestF = task.F + l.bestIdx = task.ID + } else { + task.F = l.bestF + mat.Row(task.X, l.bestIdx, l.Locs) + } + task.Op = MajorIteration + operation <- task +} + +func (l *ListSearch) Status() (Status, error) { + if l.eval < l.rows { + return NotTerminated, nil + } + return MethodConverge, nil +} + +func (l *ListSearch) Run(operation chan<- Task, result <-chan Task, tasks []Task) { + // Send initial tasks to evaluate + for _, task := range tasks { + l.sendNewLoc(operation, task) + } + // Read from the channel until PostIteration is sent or until the list of + // tasks is exhausted. +Loop: + for { + task := <-result + switch task.Op { + default: + panic("unknown operation") + case PostIteration: + break Loop + case MajorIteration: + if l.eval == l.rows { + task.Op = MethodDone + operation <- task + continue + } + l.sendNewLoc(operation, task) + case FuncEvaluation: + l.updateMajor(operation, task) + } + } + + // Post iteration was sent, or the list has been completed. Read in the final + // list of tasks. + for task := range result { + switch task.Op { + default: + panic("unknown operation") + case MajorIteration: + case FuncEvaluation: + l.updateMajor(operation, task) + } + } + close(operation) +} diff --git a/vendor/gonum.org/v1/gonum/optimize/local.go b/vendor/gonum.org/v1/gonum/optimize/local.go new file mode 100644 index 0000000000..27177e7273 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/optimize/local.go @@ -0,0 +1,146 @@ +// Copyright ©2014 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package optimize + +import ( + "math" + + "gonum.org/v1/gonum/floats" +) + +// localOptimizer is a helper type for running an optimization using a LocalMethod. +type localOptimizer struct{} + +// run controls the optimization run for a localMethod. The calling method +// must close the operation channel at the conclusion of the optimization. This +// provides a happens before relationship between the return of status and the +// closure of operation, and thus a call to method.Status (if necessary). +func (l localOptimizer) run(method localMethod, gradThresh float64, operation chan<- Task, result <-chan Task, tasks []Task) (Status, error) { + // Local methods start with a fully-specified initial location. + task := tasks[0] + task = l.initialLocation(operation, result, task, method) + if task.Op == PostIteration { + l.finish(operation, result) + return NotTerminated, nil + } + status, err := l.checkStartingLocation(task, gradThresh) + if err != nil { + l.finishMethodDone(operation, result, task) + return status, err + } + + // Send a major iteration with the starting location. + task.Op = MajorIteration + operation <- task + task = <-result + if task.Op == PostIteration { + l.finish(operation, result) + return NotTerminated, nil + } + op, err := method.initLocal(task.Location) + if err != nil { + l.finishMethodDone(operation, result, task) + return Failure, err + } + task.Op = op + operation <- task +Loop: + for { + r := <-result + switch r.Op { + case PostIteration: + break Loop + case MajorIteration: + // The last operation was a MajorIteration. Check if the gradient + // is below the threshold. + if status := l.checkGradientConvergence(r.Gradient, gradThresh); status != NotTerminated { + l.finishMethodDone(operation, result, task) + return GradientThreshold, nil + } + fallthrough + default: + op, err := method.iterateLocal(r.Location) + if err != nil { + l.finishMethodDone(operation, result, r) + return Failure, err + } + r.Op = op + operation <- r + } + } + l.finish(operation, result) + return NotTerminated, nil +} + +// initialOperation returns the Operation needed to fill the initial location +// based on the needs of the method and the values already supplied. +func (localOptimizer) initialOperation(task Task, n needser) Operation { + var newOp Operation + op := task.Op + if op&FuncEvaluation == 0 { + newOp |= FuncEvaluation + } + needs := n.needs() + if needs.Gradient && op&GradEvaluation == 0 { + newOp |= GradEvaluation + } + if needs.Hessian && op&HessEvaluation == 0 { + newOp |= HessEvaluation + } + return newOp +} + +// initialLocation fills the initial location based on the needs of the method. +// The task passed to initialLocation should be the first task sent in RunGlobal. +func (l localOptimizer) initialLocation(operation chan<- Task, result <-chan Task, task Task, needs needser) Task { + task.Op = l.initialOperation(task, needs) + operation <- task + return <-result +} + +func (l localOptimizer) checkStartingLocation(task Task, gradThresh float64) (Status, error) { + if math.IsInf(task.F, 1) || math.IsNaN(task.F) { + return Failure, ErrFunc(task.F) + } + for i, v := range task.Gradient { + if math.IsInf(v, 0) || math.IsNaN(v) { + return Failure, ErrGrad{Grad: v, Index: i} + } + } + status := l.checkGradientConvergence(task.Gradient, gradThresh) + return status, nil +} + +func (localOptimizer) checkGradientConvergence(gradient []float64, gradThresh float64) Status { + if gradient == nil || math.IsNaN(gradThresh) { + return NotTerminated + } + if gradThresh == 0 { + gradThresh = defaultGradientAbsTol + } + if norm := floats.Norm(gradient, math.Inf(1)); norm < gradThresh { + return GradientThreshold + } + return NotTerminated +} + +// finish completes the channel operations to finish an optimization. +func (localOptimizer) finish(operation chan<- Task, result <-chan Task) { + // Guarantee that result is closed before operation is closed. + for range result { + } +} + +// finishMethodDone sends a MethodDone signal on operation, reads the result, +// and completes the channel operations to finish an optimization. +func (l localOptimizer) finishMethodDone(operation chan<- Task, result <-chan Task, task Task) { + task.Op = MethodDone + operation <- task + task = <-result + if task.Op != PostIteration { + panic("optimize: task should have returned post iteration") + } + l.finish(operation, result) +} diff --git a/vendor/gonum.org/v1/gonum/optimize/minimize.go b/vendor/gonum.org/v1/gonum/optimize/minimize.go new file mode 100644 index 0000000000..ea962f258f --- /dev/null +++ b/vendor/gonum.org/v1/gonum/optimize/minimize.go @@ -0,0 +1,595 @@ +// Copyright ©2016 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package optimize + +import ( + "fmt" + "math" + "time" + + "gonum.org/v1/gonum/floats" + "gonum.org/v1/gonum/mat" +) + +const ( + nonpositiveDimension string = "optimize: non-positive input dimension" + negativeTasks string = "optimize: negative input number of tasks" +) + +// Task is a type to communicate between the Method and the outer +// calling script. +type Task struct { + ID int + Op Operation + *Location +} + +// Location represents a location in the optimization procedure. +type Location struct { + // X is the function input for the location. + X []float64 + // F is the result of evaluating the function at X. + F float64 + // Gradient holds the first-order partial derivatives + // of the function at X. + // The length of Gradient must match the length of X + // or be zero. If the capacity of Gradient is less + // than the length of X, a new slice will be allocated. + Gradient []float64 + // Hessian holds the second-order partial derivatives + // of the function at X. + // The dimensions of Hessian must match the length of X + // or Hessian must be nil or empty. If Hessian is nil + // a new mat.SymDense will be allocated, if it is empty + // it will be resized to match the length of X. + Hessian *mat.SymDense +} + +// Method is a type which can search for an optimum of an objective function. +type Method interface { + // Init initializes the method for optimization. The inputs are + // the problem dimension and number of available concurrent tasks. + // + // Init returns the number of concurrent processes to use, which must be + // less than or equal to tasks. + Init(dim, tasks int) (concurrent int) + // Run runs an optimization. The method sends Tasks on + // the operation channel (for performing function evaluations, major + // iterations, etc.). The result of the tasks will be returned on Result. + // See the documentation for Operation types for the possible operations. + // + // The caller of Run will signal the termination of the optimization + // (i.e. convergence from user settings) by sending a task with a PostIteration + // Op field on result. More tasks may still be sent on operation after this + // occurs, but only MajorIteration operations will still be conducted + // appropriately. Thus, it can not be guaranteed that all Evaluations sent + // on operation will be evaluated, however if an Evaluation is started, + // the results of that evaluation will be sent on results. + // + // The Method must read from the result channel until it is closed. + // During this, the Method may want to send new MajorIteration(s) on + // operation. Method then must close operation, and return from Run. + // These steps must establish a "happens-before" relationship between result + // being closed (externally) and Run closing operation, for example + // by using a range loop to read from result even if no results are expected. + // + // The last parameter to Run is a slice of tasks with length equal to + // the return from Init. Task has an ID field which may be + // set and modified by Method, and must not be modified by the caller. + // The first element of tasks contains information about the initial location. + // The Location.X field is always valid. The Operation field specifies which + // other values of Location are known. If Operation == NoOperation, none of + // the values should be used, otherwise the Evaluation operations will be + // composed to specify the valid fields. Methods are free to use or + // ignore these values. + // + // Successful execution of an Operation may require the Method to modify + // fields a Location. MajorIteration calls will not modify the values in + // the Location, but Evaluation operations will. Methods are encouraged to + // leave Location fields untouched to allow memory re-use. If data needs to + // be stored, the respective field should be set to nil -- Methods should + // not allocate Location memory themselves. + // + // Method may have its own specific convergence criteria, which can + // be communicated using a MethodDone operation. This will trigger a + // PostIteration to be sent on result, and the MethodDone task will not be + // returned on result. The Method must implement Statuser, and the + // call to Status must return a Status other than NotTerminated. + // + // The operation and result tasks are guaranteed to have a buffer length + // equal to the return from Init. + Run(operation chan<- Task, result <-chan Task, tasks []Task) + // Uses checks if the Method is suited to the optimization problem. The + // input is the available functions in Problem to call, and the returns are + // the functions which may be used and an error if there is a mismatch + // between the Problem and the Method's capabilities. + Uses(has Available) (uses Available, err error) +} + +// Minimize uses an optimizer to search for a minimum of a function. A +// maximization problem can be transformed into a minimization problem by +// multiplying the function by -1. +// +// The first argument represents the problem to be minimized. Its fields are +// routines that evaluate the objective function, gradient, and other +// quantities related to the problem. The objective function, p.Func, must not +// be nil. The optimization method used may require other fields to be non-nil +// as specified by method.Needs. Minimize will panic if these are not met. The +// method can be determined automatically from the supplied problem which is +// described below. +// +// If p.Status is not nil, it is called before every evaluation. If the +// returned Status is other than NotTerminated or if the error is not nil, the +// optimization run is terminated. +// +// The second argument specifies the initial location for the optimization. +// Some Methods do not require an initial location, but initX must still be +// specified for the dimension of the optimization problem. +// +// The third argument contains the settings for the minimization. If settings +// is nil, the zero value will be used, see the documentation of the Settings +// type for more information, and see the warning below. All settings will be +// honored for all Methods, even if that setting is counter-productive to the +// method. Minimize cannot guarantee strict adherence to the evaluation bounds +// specified when performing concurrent evaluations and updates. +// +// The final argument is the optimization method to use. If method == nil, then +// an appropriate default is chosen based on the properties of the other arguments +// (dimension, gradient-free or gradient-based, etc.). If method is not nil, +// Minimize panics if the Problem is not consistent with the Method (Uses +// returns an error). +// +// Minimize returns a Result struct and any error that occurred. See the +// documentation of Result for more information. +// +// See the documentation for Method for the details on implementing a method. +// +// Be aware that the default settings of Minimize are to accurately find the +// minimum. For certain functions and optimization methods, this can take many +// function evaluations. The Settings input struct can be used to limit this, +// for example by modifying the maximum function evaluations or gradient tolerance. +func Minimize(p Problem, initX []float64, settings *Settings, method Method) (*Result, error) { + startTime := time.Now() + if method == nil { + method = getDefaultMethod(&p) + } + if settings == nil { + settings = &Settings{} + } + stats := &Stats{} + dim := len(initX) + err := checkOptimization(p, dim, settings.Recorder) + if err != nil { + return nil, err + } + + optLoc := newLocation(dim) // This must have an allocated X field. + optLoc.F = math.Inf(1) + + initOp, initLoc := getInitLocation(dim, initX, settings.InitValues) + + converger := settings.Converger + if converger == nil { + converger = defaultFunctionConverge() + } + converger.Init(dim) + + stats.Runtime = time.Since(startTime) + + // Send initial location to Recorder + if settings.Recorder != nil { + err = settings.Recorder.Record(optLoc, InitIteration, stats) + if err != nil { + return nil, err + } + } + + // Run optimization + var status Status + status, err = minimize(&p, method, settings, converger, stats, initOp, initLoc, optLoc, startTime) + + // Cleanup and collect results + if settings.Recorder != nil && err == nil { + err = settings.Recorder.Record(optLoc, PostIteration, stats) + } + stats.Runtime = time.Since(startTime) + return &Result{ + Location: *optLoc, + Stats: *stats, + Status: status, + }, err +} + +func getDefaultMethod(p *Problem) Method { + if p.Grad != nil { + return &LBFGS{} + } + return &NelderMead{} +} + +// minimize performs an optimization. minimize updates the settings and optLoc, +// and returns the final Status and error. +func minimize(prob *Problem, method Method, settings *Settings, converger Converger, stats *Stats, initOp Operation, initLoc, optLoc *Location, startTime time.Time) (Status, error) { + dim := len(optLoc.X) + nTasks := settings.Concurrent + if nTasks == 0 { + nTasks = 1 + } + has := availFromProblem(*prob) + _, initErr := method.Uses(has) + if initErr != nil { + panic(fmt.Sprintf("optimize: specified method inconsistent with Problem: %v", initErr)) + } + newNTasks := method.Init(dim, nTasks) + if newNTasks > nTasks { + panic("optimize: too many tasks returned by Method") + } + nTasks = newNTasks + + // Launch the method. The method communicates tasks using the operations + // channel, and results is used to return the evaluated results. + operations := make(chan Task, nTasks) + results := make(chan Task, nTasks) + go func() { + tasks := make([]Task, nTasks) + tasks[0].Location = initLoc + tasks[0].Op = initOp + for i := 1; i < len(tasks); i++ { + tasks[i].Location = newLocation(dim) + } + method.Run(operations, results, tasks) + }() + + // Algorithmic Overview: + // There are three pieces to performing a concurrent optimization, + // the distributor, the workers, and the stats combiner. At a high level, + // the distributor reads in tasks sent by method, sending evaluations to the + // workers, and forwarding other operations to the statsCombiner. The workers + // read these forwarded evaluation tasks, evaluate the relevant parts of Problem + // and forward the results on to the stats combiner. The stats combiner reads + // in results from the workers, as well as tasks from the distributor, and + // uses them to update optimization statistics (function evaluations, etc.) + // and to check optimization convergence. + // + // The complicated part is correctly shutting down the optimization. The + // procedure is as follows. First, the stats combiner closes done and sends + // a PostIteration to the method. The distributor then reads that done has + // been closed, and closes the channel with the workers. At this point, no + // more evaluation operations will be executed. As the workers finish their + // evaluations, they forward the results onto the stats combiner, and then + // signal their shutdown to the stats combiner. When all workers have successfully + // finished, the stats combiner closes the results channel, signaling to the + // method that all results have been collected. At this point, the method + // may send MajorIteration(s) to update an optimum location based on these + // last returned results, and then the method will close the operations channel. + // The Method must ensure that the closing of results happens before the + // closing of operations in order to ensure proper shutdown order. + // Now that no more tasks will be commanded by the method, the distributor + // closes statsChan, and with no more statistics to update the optimization + // concludes. + + workerChan := make(chan Task) // Delegate tasks to the workers. + statsChan := make(chan Task) // Send evaluation updates. + done := make(chan struct{}) // Communicate the optimization is done. + + // Read tasks from the method and distribute as appropriate. + distributor := func() { + for { + select { + case task := <-operations: + switch task.Op { + case InitIteration: + panic("optimize: Method returned InitIteration") + case PostIteration: + panic("optimize: Method returned PostIteration") + case NoOperation, MajorIteration, MethodDone: + statsChan <- task + default: + if !task.Op.isEvaluation() { + panic("optimize: expecting evaluation operation") + } + workerChan <- task + } + case <-done: + // No more evaluations will be sent, shut down the workers, and + // read the final tasks. + close(workerChan) + for task := range operations { + if task.Op == MajorIteration { + statsChan <- task + } + } + close(statsChan) + return + } + } + } + go distributor() + + // Evaluate the Problem concurrently. + worker := func() { + x := make([]float64, dim) + for task := range workerChan { + evaluate(prob, task.Location, task.Op, x) + statsChan <- task + } + // Signal successful worker completion. + statsChan <- Task{Op: signalDone} + } + for i := 0; i < nTasks; i++ { + go worker() + } + + var ( + workersDone int // effective wg for the workers + status Status + err error + finalStatus Status + finalError error + ) + + // Update optimization statistics and check convergence. + var methodDone bool + for task := range statsChan { + switch task.Op { + default: + if !task.Op.isEvaluation() { + panic("minimize: evaluation task expected") + } + updateEvaluationStats(stats, task.Op) + status, err = checkEvaluationLimits(prob, stats, settings) + case signalDone: + workersDone++ + if workersDone == nTasks { + close(results) + } + continue + case NoOperation: + // Just send the task back. + case MajorIteration: + status = performMajorIteration(optLoc, task.Location, stats, converger, startTime, settings) + case MethodDone: + methodDone = true + status = MethodConverge + } + if settings.Recorder != nil && status == NotTerminated && err == nil { + stats.Runtime = time.Since(startTime) + // Allow err to be overloaded if the Recorder fails. + err = settings.Recorder.Record(task.Location, task.Op, stats) + if err != nil { + status = Failure + } + } + // If this is the first termination status, trigger the conclusion of + // the optimization. + if status != NotTerminated || err != nil { + select { + case <-done: + default: + finalStatus = status + finalError = err + results <- Task{ + Op: PostIteration, + } + close(done) + } + } + + // Send the result back to the Problem if there are still active workers. + if workersDone != nTasks && task.Op != MethodDone { + results <- task + } + } + // This code block is here rather than above to ensure Status() is not called + // before Method.Run closes operations. + if methodDone { + statuser, ok := method.(Statuser) + if !ok { + panic("optimize: method returned MethodDone but is not a Statuser") + } + finalStatus, finalError = statuser.Status() + if finalStatus == NotTerminated { + panic("optimize: method returned MethodDone but a NotTerminated status") + } + } + return finalStatus, finalError +} + +func defaultFunctionConverge() *FunctionConverge { + return &FunctionConverge{ + Absolute: 1e-10, + Iterations: 100, + } +} + +// newLocation allocates a new location structure with an X field of the +// appropriate size. +func newLocation(dim int) *Location { + return &Location{ + X: make([]float64, dim), + } +} + +// getInitLocation checks the validity of initLocation and initOperation and +// returns the initial values as a *Location. +func getInitLocation(dim int, initX []float64, initValues *Location) (Operation, *Location) { + loc := newLocation(dim) + if initX == nil { + if initValues != nil { + panic("optimize: initValues is non-nil but no initial location specified") + } + return NoOperation, loc + } + copy(loc.X, initX) + if initValues == nil { + return NoOperation, loc + } else { + if initValues.X != nil { + panic("optimize: location specified in InitValues (only use InitX)") + } + } + loc.F = initValues.F + op := FuncEvaluation + if initValues.Gradient != nil { + if len(initValues.Gradient) != dim { + panic("optimize: initial gradient does not match problem dimension") + } + loc.Gradient = initValues.Gradient + op |= GradEvaluation + } + if initValues.Hessian != nil { + if initValues.Hessian.SymmetricDim() != dim { + panic("optimize: initial Hessian does not match problem dimension") + } + loc.Hessian = initValues.Hessian + op |= HessEvaluation + } + return op, loc +} + +func checkOptimization(p Problem, dim int, recorder Recorder) error { + if p.Func == nil { + panic(badProblem) + } + if dim <= 0 { + panic("optimize: impossible problem dimension") + } + if p.Status != nil { + _, err := p.Status() + if err != nil { + return err + } + } + if recorder != nil { + err := recorder.Init() + if err != nil { + return err + } + } + return nil +} + +// evaluate evaluates the routines specified by the Operation at loc.X, and stores +// the answer into loc. loc.X is copied into x before evaluating in order to +// prevent the routines from modifying it. +func evaluate(p *Problem, loc *Location, op Operation, x []float64) { + if !op.isEvaluation() { + panic(fmt.Sprintf("optimize: invalid evaluation %v", op)) + } + copy(x, loc.X) + if op&FuncEvaluation != 0 { + loc.F = p.Func(x) + } + if op&GradEvaluation != 0 { + // Make sure we have a destination in which to place the gradient. + if len(loc.Gradient) == 0 { + if cap(loc.Gradient) < len(x) { + loc.Gradient = make([]float64, len(x)) + } else { + loc.Gradient = loc.Gradient[:len(x)] + } + } + p.Grad(loc.Gradient, x) + } + if op&HessEvaluation != 0 { + // Make sure we have a destination in which to place the Hessian. + switch { + case loc.Hessian == nil: + loc.Hessian = mat.NewSymDense(len(x), nil) + case loc.Hessian.IsEmpty(): + loc.Hessian.ReuseAsSym(len(x)) + } + p.Hess(loc.Hessian, x) + } +} + +// updateEvaluationStats updates the statistics based on the operation. +func updateEvaluationStats(stats *Stats, op Operation) { + if op&FuncEvaluation != 0 { + stats.FuncEvaluations++ + } + if op&GradEvaluation != 0 { + stats.GradEvaluations++ + } + if op&HessEvaluation != 0 { + stats.HessEvaluations++ + } +} + +// checkLocationConvergence checks if the current optimal location satisfies +// any of the convergence criteria based on the function location. +// +// checkLocationConvergence returns NotTerminated if the Location does not satisfy +// the convergence criteria given by settings. Otherwise a corresponding status is +// returned. +// Unlike checkLimits, checkConvergence is called only at MajorIterations. +func checkLocationConvergence(loc *Location, settings *Settings, converger Converger) Status { + if math.IsInf(loc.F, -1) { + return FunctionNegativeInfinity + } + if loc.Gradient != nil && settings.GradientThreshold > 0 { + norm := floats.Norm(loc.Gradient, math.Inf(1)) + if norm < settings.GradientThreshold { + return GradientThreshold + } + } + return converger.Converged(loc) +} + +// checkEvaluationLimits checks the optimization limits after an evaluation +// Operation. It checks the number of evaluations (of various kinds) and checks +// the status of the Problem, if applicable. +func checkEvaluationLimits(p *Problem, stats *Stats, settings *Settings) (Status, error) { + if p.Status != nil { + status, err := p.Status() + if err != nil || status != NotTerminated { + return status, err + } + } + if settings.FuncEvaluations > 0 && stats.FuncEvaluations >= settings.FuncEvaluations { + return FunctionEvaluationLimit, nil + } + if settings.GradEvaluations > 0 && stats.GradEvaluations >= settings.GradEvaluations { + return GradientEvaluationLimit, nil + } + if settings.HessEvaluations > 0 && stats.HessEvaluations >= settings.HessEvaluations { + return HessianEvaluationLimit, nil + } + return NotTerminated, nil +} + +// checkIterationLimits checks the limits on iterations affected by MajorIteration. +func checkIterationLimits(loc *Location, stats *Stats, settings *Settings) Status { + if settings.MajorIterations > 0 && stats.MajorIterations >= settings.MajorIterations { + return IterationLimit + } + if settings.Runtime > 0 && stats.Runtime >= settings.Runtime { + return RuntimeLimit + } + return NotTerminated +} + +// performMajorIteration does all of the steps needed to perform a MajorIteration. +// It increments the iteration count, updates the optimal location, and checks +// the necessary convergence criteria. +func performMajorIteration(optLoc, loc *Location, stats *Stats, converger Converger, startTime time.Time, settings *Settings) Status { + optLoc.F = loc.F + copy(optLoc.X, loc.X) + if loc.Gradient == nil { + optLoc.Gradient = nil + } else { + if optLoc.Gradient == nil { + optLoc.Gradient = make([]float64, len(loc.Gradient)) + } + copy(optLoc.Gradient, loc.Gradient) + } + stats.MajorIterations++ + stats.Runtime = time.Since(startTime) + status := checkLocationConvergence(optLoc, settings, converger) + if status != NotTerminated { + return status + } + return checkIterationLimits(optLoc, stats, settings) +} diff --git a/vendor/gonum.org/v1/gonum/optimize/morethuente.go b/vendor/gonum.org/v1/gonum/optimize/morethuente.go new file mode 100644 index 0000000000..cb23890ca1 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/optimize/morethuente.go @@ -0,0 +1,387 @@ +// Copyright ©2015 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package optimize + +import "math" + +var _ Linesearcher = (*MoreThuente)(nil) + +// MoreThuente is a Linesearcher that finds steps that satisfy both the +// sufficient decrease and curvature conditions (the strong Wolfe conditions). +// +// References: +// - More, J.J. and D.J. Thuente: Line Search Algorithms with Guaranteed Sufficient +// Decrease. ACM Transactions on Mathematical Software 20(3) (1994), 286-307 +type MoreThuente struct { + // DecreaseFactor is the constant factor in the sufficient decrease + // (Armijo) condition. + // It must be in the interval [0, 1). The default value is 0. + DecreaseFactor float64 + // CurvatureFactor is the constant factor in the Wolfe conditions. Smaller + // values result in a more exact line search. + // A set value must be in the interval (0, 1). If it is zero, it will be + // defaulted to 0.9. + CurvatureFactor float64 + // StepTolerance sets the minimum acceptable width for the linesearch + // interval. If the relative interval length is less than this value, + // ErrLinesearcherFailure is returned. + // It must be non-negative. If it is zero, it will be defaulted to 1e-10. + StepTolerance float64 + + // MinimumStep is the minimum step that the linesearcher will take. + // It must be non-negative and less than MaximumStep. Defaults to no + // minimum (a value of 0). + MinimumStep float64 + // MaximumStep is the maximum step that the linesearcher will take. + // It must be greater than MinimumStep. If it is zero, it will be defaulted + // to 1e20. + MaximumStep float64 + + bracketed bool // Indicates if a minimum has been bracketed. + fInit float64 // Function value at step = 0. + gInit float64 // Derivative value at step = 0. + + // When stage is 1, the algorithm updates the interval given by x and y + // so that it contains a minimizer of the modified function + // psi(step) = f(step) - f(0) - DecreaseFactor * step * f'(0). + // When stage is 2, the interval is updated so that it contains a minimizer + // of f. + stage int + + step float64 // Current step. + lower, upper float64 // Lower and upper bounds on the next step. + x float64 // Endpoint of the interval with a lower function value. + fx, gx float64 // Data at x. + y float64 // The other endpoint. + fy, gy float64 // Data at y. + width [2]float64 // Width of the interval at two previous iterations. +} + +const ( + mtMinGrowthFactor float64 = 1.1 + mtMaxGrowthFactor float64 = 4 +) + +func (mt *MoreThuente) Init(f, g float64, step float64) Operation { + // Based on the original Fortran code that is available, for example, from + // http://ftp.mcs.anl.gov/pub/MINPACK-2/csrch/ + // as part of + // MINPACK-2 Project. November 1993. + // Argonne National Laboratory and University of Minnesota. + // Brett M. Averick, Richard G. Carter, and Jorge J. Moré. + + if g >= 0 { + panic("morethuente: initial derivative is non-negative") + } + if step <= 0 { + panic("morethuente: invalid initial step") + } + + if mt.CurvatureFactor == 0 { + mt.CurvatureFactor = 0.9 + } + if mt.StepTolerance == 0 { + mt.StepTolerance = 1e-10 + } + if mt.MaximumStep == 0 { + mt.MaximumStep = 1e20 + } + + if mt.MinimumStep < 0 { + panic("morethuente: minimum step is negative") + } + if mt.MaximumStep <= mt.MinimumStep { + panic("morethuente: maximum step is not greater than minimum step") + } + if mt.DecreaseFactor < 0 || mt.DecreaseFactor >= 1 { + panic("morethuente: invalid decrease factor") + } + if mt.CurvatureFactor <= 0 || mt.CurvatureFactor >= 1 { + panic("morethuente: invalid curvature factor") + } + if mt.StepTolerance <= 0 { + panic("morethuente: step tolerance is not positive") + } + + if step < mt.MinimumStep { + step = mt.MinimumStep + } + if step > mt.MaximumStep { + step = mt.MaximumStep + } + + mt.bracketed = false + mt.stage = 1 + mt.fInit = f + mt.gInit = g + + mt.x, mt.fx, mt.gx = 0, f, g + mt.y, mt.fy, mt.gy = 0, f, g + + mt.lower = 0 + mt.upper = step + mtMaxGrowthFactor*step + + mt.width[0] = mt.MaximumStep - mt.MinimumStep + mt.width[1] = 2 * mt.width[0] + + mt.step = step + return FuncEvaluation | GradEvaluation +} + +func (mt *MoreThuente) Iterate(f, g float64) (Operation, float64, error) { + if mt.stage == 0 { + panic("morethuente: Init has not been called") + } + + gTest := mt.DecreaseFactor * mt.gInit + fTest := mt.fInit + mt.step*gTest + + if mt.bracketed { + if mt.step <= mt.lower || mt.step >= mt.upper || mt.upper-mt.lower <= mt.StepTolerance*mt.upper { + // step contains the best step found (see below). + return NoOperation, mt.step, ErrLinesearcherFailure + } + } + if mt.step == mt.MaximumStep && f <= fTest && g <= gTest { + return NoOperation, mt.step, ErrLinesearcherBound + } + if mt.step == mt.MinimumStep && (f > fTest || g >= gTest) { + return NoOperation, mt.step, ErrLinesearcherFailure + } + + // Test for convergence. + if f <= fTest && math.Abs(g) <= mt.CurvatureFactor*(-mt.gInit) { + mt.stage = 0 + return MajorIteration, mt.step, nil + } + + if mt.stage == 1 && f <= fTest && g >= 0 { + mt.stage = 2 + } + + if mt.stage == 1 && f <= mt.fx && f > fTest { + // Lower function value but the decrease is not sufficient . + + // Compute values and derivatives of the modified function at step, x, y. + fm := f - mt.step*gTest + fxm := mt.fx - mt.x*gTest + fym := mt.fy - mt.y*gTest + gm := g - gTest + gxm := mt.gx - gTest + gym := mt.gy - gTest + // Update x, y and step. + mt.nextStep(fxm, gxm, fym, gym, fm, gm) + // Recover values and derivates of the non-modified function at x and y. + mt.fx = fxm + mt.x*gTest + mt.fy = fym + mt.y*gTest + mt.gx = gxm + gTest + mt.gy = gym + gTest + } else { + // Update x, y and step. + mt.nextStep(mt.fx, mt.gx, mt.fy, mt.gy, f, g) + } + + if mt.bracketed { + // Monitor the length of the bracketing interval. If the interval has + // not been reduced sufficiently after two steps, use bisection to + // force its length to zero. + width := mt.y - mt.x + if math.Abs(width) >= 2.0/3*mt.width[1] { + mt.step = mt.x + 0.5*width + } + mt.width[0], mt.width[1] = math.Abs(width), mt.width[0] + } + + if mt.bracketed { + mt.lower = math.Min(mt.x, mt.y) + mt.upper = math.Max(mt.x, mt.y) + } else { + mt.lower = mt.step + mtMinGrowthFactor*(mt.step-mt.x) + mt.upper = mt.step + mtMaxGrowthFactor*(mt.step-mt.x) + } + + // Force the step to be in [MinimumStep, MaximumStep]. + mt.step = math.Max(mt.MinimumStep, math.Min(mt.step, mt.MaximumStep)) + + if mt.bracketed { + if mt.step <= mt.lower || mt.step >= mt.upper || mt.upper-mt.lower <= mt.StepTolerance*mt.upper { + // If further progress is not possible, set step to the best step + // obtained during the search. + mt.step = mt.x + } + } + + return FuncEvaluation | GradEvaluation, mt.step, nil +} + +// nextStep computes the next safeguarded step and updates the interval that +// contains a step that satisfies the sufficient decrease and curvature +// conditions. +func (mt *MoreThuente) nextStep(fx, gx, fy, gy, f, g float64) { + x := mt.x + y := mt.y + step := mt.step + + gNeg := g < 0 + if gx < 0 { + gNeg = !gNeg + } + + var next float64 + var bracketed bool + switch { + case f > fx: + // A higher function value. The minimum is bracketed between x and step. + // We want the next step to be closer to x because the function value + // there is lower. + + theta := 3*(fx-f)/(step-x) + gx + g + s := math.Max(math.Abs(gx), math.Abs(g)) + s = math.Max(s, math.Abs(theta)) + gamma := s * math.Sqrt((theta/s)*(theta/s)-(gx/s)*(g/s)) + if step < x { + gamma *= -1 + } + p := gamma - gx + theta + q := gamma - gx + gamma + g + r := p / q + stpc := x + r*(step-x) + stpq := x + gx/((fx-f)/(step-x)+gx)/2*(step-x) + + if math.Abs(stpc-x) < math.Abs(stpq-x) { + // The cubic step is closer to x than the quadratic step. + // Take the cubic step. + next = stpc + } else { + // If f is much larger than fx, then the quadratic step may be too + // close to x. Therefore heuristically take the average of the + // cubic and quadratic steps. + next = stpc + (stpq-stpc)/2 + } + bracketed = true + + case gNeg: + // A lower function value and derivatives of opposite sign. The minimum + // is bracketed between x and step. If we choose a step that is far + // from step, the next iteration will also likely fall in this case. + + theta := 3*(fx-f)/(step-x) + gx + g + s := math.Max(math.Abs(gx), math.Abs(g)) + s = math.Max(s, math.Abs(theta)) + gamma := s * math.Sqrt((theta/s)*(theta/s)-(gx/s)*(g/s)) + if step > x { + gamma *= -1 + } + p := gamma - g + theta + q := gamma - g + gamma + gx + r := p / q + stpc := step + r*(x-step) + stpq := step + g/(g-gx)*(x-step) + + if math.Abs(stpc-step) > math.Abs(stpq-step) { + // The cubic step is farther from x than the quadratic step. + // Take the cubic step. + next = stpc + } else { + // Take the quadratic step. + next = stpq + } + bracketed = true + + case math.Abs(g) < math.Abs(gx): + // A lower function value, derivatives of the same sign, and the + // magnitude of the derivative decreases. Extrapolate function values + // at x and step so that the next step lies between step and y. + + theta := 3*(fx-f)/(step-x) + gx + g + s := math.Max(math.Abs(gx), math.Abs(g)) + s = math.Max(s, math.Abs(theta)) + gamma := s * math.Sqrt(math.Max(0, (theta/s)*(theta/s)-(gx/s)*(g/s))) + if step > x { + gamma *= -1 + } + p := gamma - g + theta + q := gamma + gx - g + gamma + r := p / q + var stpc float64 + switch { + case r < 0 && gamma != 0: + stpc = step + r*(x-step) + case step > x: + stpc = mt.upper + default: + stpc = mt.lower + } + stpq := step + g/(g-gx)*(x-step) + + if mt.bracketed { + // We are extrapolating so be cautious and take the step that + // is closer to step. + if math.Abs(stpc-step) < math.Abs(stpq-step) { + next = stpc + } else { + next = stpq + } + // Modify next if it is close to or beyond y. + if step > x { + next = math.Min(step+2.0/3*(y-step), next) + } else { + next = math.Max(step+2.0/3*(y-step), next) + } + } else { + // Minimum has not been bracketed so take the larger step... + if math.Abs(stpc-step) > math.Abs(stpq-step) { + next = stpc + } else { + next = stpq + } + // ...but within reason. + next = math.Max(mt.lower, math.Min(next, mt.upper)) + } + + default: + // A lower function value, derivatives of the same sign, and the + // magnitude of the derivative does not decrease. The function seems to + // decrease rapidly in the direction of the step. + + switch { + case mt.bracketed: + theta := 3*(f-fy)/(y-step) + gy + g + s := math.Max(math.Abs(gy), math.Abs(g)) + s = math.Max(s, math.Abs(theta)) + gamma := s * math.Sqrt((theta/s)*(theta/s)-(gy/s)*(g/s)) + if step > y { + gamma *= -1 + } + p := gamma - g + theta + q := gamma - g + gamma + gy + r := p / q + next = step + r*(y-step) + case step > x: + next = mt.upper + default: + next = mt.lower + } + } + + if f > fx { + // x is still the best step. + mt.y = step + mt.fy = f + mt.gy = g + } else { + // step is the new best step. + if gNeg { + mt.y = x + mt.fy = fx + mt.gy = gx + } + mt.x = step + mt.fx = f + mt.gx = g + } + mt.bracketed = bracketed + mt.step = next +} diff --git a/vendor/gonum.org/v1/gonum/optimize/neldermead.go b/vendor/gonum.org/v1/gonum/optimize/neldermead.go new file mode 100644 index 0000000000..5118fd4cd5 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/optimize/neldermead.go @@ -0,0 +1,348 @@ +// Copyright ©2015 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package optimize + +import ( + "math" + "sort" + + "gonum.org/v1/gonum/floats" +) + +// nmIterType is a Nelder-Mead evaluation kind +type nmIterType int + +const ( + nmReflected = iota + nmExpanded + nmContractedInside + nmContractedOutside + nmInitialize + nmShrink + nmMajor +) + +type nmVertexSorter struct { + vertices [][]float64 + values []float64 +} + +func (n nmVertexSorter) Len() int { + return len(n.values) +} + +func (n nmVertexSorter) Less(i, j int) bool { + return n.values[i] < n.values[j] +} + +func (n nmVertexSorter) Swap(i, j int) { + n.values[i], n.values[j] = n.values[j], n.values[i] + n.vertices[i], n.vertices[j] = n.vertices[j], n.vertices[i] +} + +var _ Method = (*NelderMead)(nil) + +// NelderMead is an implementation of the Nelder-Mead simplex algorithm for +// gradient-free nonlinear optimization (not to be confused with Danzig's +// simplex algorithm for linear programming). The implementation follows the +// algorithm described in +// +// http://epubs.siam.org/doi/pdf/10.1137/S1052623496303470 +// +// If an initial simplex is provided, it is used and initLoc is ignored. If +// InitialVertices and InitialValues are both nil, an initial simplex will be +// generated automatically using the initial location as one vertex, and each +// additional vertex as SimplexSize away in one dimension. +// +// If the simplex update parameters (Reflection, etc.) +// are zero, they will be set automatically based on the dimension according to +// the recommendations in +// +// http://www.webpages.uidaho.edu/~fuchang/res/ANMS.pdf +type NelderMead struct { + InitialVertices [][]float64 + InitialValues []float64 + Reflection float64 // Reflection parameter (>0) + Expansion float64 // Expansion parameter (>1) + Contraction float64 // Contraction parameter (>0, <1) + Shrink float64 // Shrink parameter (>0, <1) + SimplexSize float64 // size of auto-constructed initial simplex + + status Status + err error + + reflection float64 + expansion float64 + contraction float64 + shrink float64 + + vertices [][]float64 // location of the vertices sorted in ascending f + values []float64 // function values at the vertices sorted in ascending f + centroid []float64 // centroid of all but the worst vertex + + fillIdx int // index for filling the simplex during initialization and shrinking + lastIter nmIterType // Last iteration + reflectedPoint []float64 // Storage of the reflected point location + reflectedValue float64 // Value at the last reflection point +} + +func (n *NelderMead) Status() (Status, error) { + return n.status, n.err +} + +func (*NelderMead) Uses(has Available) (uses Available, err error) { + return has.function() +} + +func (n *NelderMead) Init(dim, tasks int) int { + n.status = NotTerminated + n.err = nil + return 1 +} + +func (n *NelderMead) Run(operation chan<- Task, result <-chan Task, tasks []Task) { + n.status, n.err = localOptimizer{}.run(n, math.NaN(), operation, result, tasks) + close(operation) +} + +func (n *NelderMead) initLocal(loc *Location) (Operation, error) { + dim := len(loc.X) + if cap(n.vertices) < dim+1 { + n.vertices = make([][]float64, dim+1) + } + n.vertices = n.vertices[:dim+1] + for i := range n.vertices { + n.vertices[i] = resize(n.vertices[i], dim) + } + n.values = resize(n.values, dim+1) + n.centroid = resize(n.centroid, dim) + n.reflectedPoint = resize(n.reflectedPoint, dim) + + if n.SimplexSize == 0 { + n.SimplexSize = 0.05 + } + + // Default parameter choices are chosen in a dimension-dependent way + // from http://www.webpages.uidaho.edu/~fuchang/res/ANMS.pdf + n.reflection = n.Reflection + if n.reflection == 0 { + n.reflection = 1 + } + n.expansion = n.Expansion + if n.expansion == 0 { + n.expansion = 1 + 2/float64(dim) + if dim == 1 { + n.expansion = 2 + } + } + n.contraction = n.Contraction + if n.contraction == 0 { + n.contraction = 0.75 - 1/(2*float64(dim)) + if dim == 1 { + n.contraction = 0.5 + } + } + n.shrink = n.Shrink + if n.shrink == 0 { + n.shrink = 1 - 1/float64(dim) + if dim == 1 { + n.shrink = 0.5 + } + } + + if n.InitialVertices != nil { + // Initial simplex provided. Copy the locations and values, and sort them. + if len(n.InitialVertices) != dim+1 { + panic("neldermead: incorrect number of vertices in initial simplex") + } + if len(n.InitialValues) != dim+1 { + panic("neldermead: incorrect number of values in initial simplex") + } + for i := range n.InitialVertices { + if len(n.InitialVertices[i]) != dim { + panic("neldermead: vertex size mismatch") + } + copy(n.vertices[i], n.InitialVertices[i]) + } + copy(n.values, n.InitialValues) + sort.Sort(nmVertexSorter{n.vertices, n.values}) + computeCentroid(n.vertices, n.centroid) + return n.returnNext(nmMajor, loc) + } + + // No simplex provided. Begin initializing initial simplex. First simplex + // entry is the initial location, then step 1 in every direction. + copy(n.vertices[dim], loc.X) + n.values[dim] = loc.F + n.fillIdx = 0 + loc.X[n.fillIdx] += n.SimplexSize + n.lastIter = nmInitialize + return FuncEvaluation, nil +} + +// computeCentroid computes the centroid of all the simplex vertices except the +// final one +func computeCentroid(vertices [][]float64, centroid []float64) { + dim := len(centroid) + for i := range centroid { + centroid[i] = 0 + } + for i := 0; i < dim; i++ { + vertex := vertices[i] + for j, v := range vertex { + centroid[j] += v + } + } + for i := range centroid { + centroid[i] /= float64(dim) + } +} + +func (n *NelderMead) iterateLocal(loc *Location) (Operation, error) { + dim := len(loc.X) + switch n.lastIter { + case nmInitialize: + n.values[n.fillIdx] = loc.F + copy(n.vertices[n.fillIdx], loc.X) + n.fillIdx++ + if n.fillIdx == dim { + // Successfully finished building initial simplex. + sort.Sort(nmVertexSorter{n.vertices, n.values}) + computeCentroid(n.vertices, n.centroid) + return n.returnNext(nmMajor, loc) + } + copy(loc.X, n.vertices[dim]) + loc.X[n.fillIdx] += n.SimplexSize + return FuncEvaluation, nil + case nmMajor: + // Nelder Mead iterations start with Reflection step + return n.returnNext(nmReflected, loc) + case nmReflected: + n.reflectedValue = loc.F + switch { + case loc.F >= n.values[0] && loc.F < n.values[dim-1]: + n.replaceWorst(loc.X, loc.F) + return n.returnNext(nmMajor, loc) + case loc.F < n.values[0]: + return n.returnNext(nmExpanded, loc) + default: + if loc.F < n.values[dim] { + return n.returnNext(nmContractedOutside, loc) + } + return n.returnNext(nmContractedInside, loc) + } + case nmExpanded: + if loc.F < n.reflectedValue { + n.replaceWorst(loc.X, loc.F) + } else { + n.replaceWorst(n.reflectedPoint, n.reflectedValue) + } + return n.returnNext(nmMajor, loc) + case nmContractedOutside: + if loc.F <= n.reflectedValue { + n.replaceWorst(loc.X, loc.F) + return n.returnNext(nmMajor, loc) + } + n.fillIdx = 1 + return n.returnNext(nmShrink, loc) + case nmContractedInside: + if loc.F < n.values[dim] { + n.replaceWorst(loc.X, loc.F) + return n.returnNext(nmMajor, loc) + } + n.fillIdx = 1 + return n.returnNext(nmShrink, loc) + case nmShrink: + copy(n.vertices[n.fillIdx], loc.X) + n.values[n.fillIdx] = loc.F + n.fillIdx++ + if n.fillIdx != dim+1 { + return n.returnNext(nmShrink, loc) + } + sort.Sort(nmVertexSorter{n.vertices, n.values}) + computeCentroid(n.vertices, n.centroid) + return n.returnNext(nmMajor, loc) + default: + panic("unreachable") + } +} + +// returnNext updates the location based on the iteration type and the current +// simplex, and returns the next operation. +func (n *NelderMead) returnNext(iter nmIterType, loc *Location) (Operation, error) { + n.lastIter = iter + switch iter { + case nmMajor: + // Fill loc with the current best point and value, + // and command a convergence check. + copy(loc.X, n.vertices[0]) + loc.F = n.values[0] + return MajorIteration, nil + case nmReflected, nmExpanded, nmContractedOutside, nmContractedInside: + // x_new = x_centroid + scale * (x_centroid - x_worst) + var scale float64 + switch iter { + case nmReflected: + scale = n.reflection + case nmExpanded: + scale = n.reflection * n.expansion + case nmContractedOutside: + scale = n.reflection * n.contraction + case nmContractedInside: + scale = -n.contraction + } + dim := len(loc.X) + floats.SubTo(loc.X, n.centroid, n.vertices[dim]) + floats.Scale(scale, loc.X) + floats.Add(loc.X, n.centroid) + if iter == nmReflected { + copy(n.reflectedPoint, loc.X) + } + return FuncEvaluation, nil + case nmShrink: + // x_shrink = x_best + delta * (x_i + x_best) + floats.SubTo(loc.X, n.vertices[n.fillIdx], n.vertices[0]) + floats.Scale(n.shrink, loc.X) + floats.Add(loc.X, n.vertices[0]) + return FuncEvaluation, nil + default: + panic("unreachable") + } +} + +// replaceWorst removes the worst location in the simplex and adds the new +// {x, f} pair maintaining sorting. +func (n *NelderMead) replaceWorst(x []float64, f float64) { + dim := len(x) + if f >= n.values[dim] { + panic("increase in simplex value") + } + copy(n.vertices[dim], x) + n.values[dim] = f + + // Sort the newly-added value. + for i := dim - 1; i >= 0; i-- { + if n.values[i] < f { + break + } + n.vertices[i], n.vertices[i+1] = n.vertices[i+1], n.vertices[i] + n.values[i], n.values[i+1] = n.values[i+1], n.values[i] + } + + // Update the location of the centroid. Only one point has been replaced, so + // subtract the worst point and add the new one. + floats.AddScaled(n.centroid, -1/float64(dim), n.vertices[dim]) + floats.AddScaled(n.centroid, 1/float64(dim), x) +} + +func (*NelderMead) needs() struct { + Gradient bool + Hessian bool +} { + return struct { + Gradient bool + Hessian bool + }{false, false} +} diff --git a/vendor/gonum.org/v1/gonum/optimize/newton.go b/vendor/gonum.org/v1/gonum/optimize/newton.go new file mode 100644 index 0000000000..bd29a08be6 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/optimize/newton.go @@ -0,0 +1,182 @@ +// Copyright ©2015 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package optimize + +import ( + "math" + + "gonum.org/v1/gonum/mat" +) + +const maxNewtonModifications = 20 + +var ( + _ Method = (*Newton)(nil) + _ localMethod = (*Newton)(nil) + _ NextDirectioner = (*Newton)(nil) +) + +// Newton implements a modified Newton's method for Hessian-based unconstrained +// minimization. It applies regularization when the Hessian is not positive +// definite, and it can converge to a local minimum from any starting point. +// +// Newton iteratively forms a quadratic model to the objective function f and +// tries to minimize this approximate model. It generates a sequence of +// locations x_k by means of +// +// solve H_k d_k = -∇f_k for d_k, +// x_{k+1} = x_k + α_k d_k, +// +// where H_k is the Hessian matrix of f at x_k and α_k is a step size found by +// a line search. +// +// Away from a minimizer H_k may not be positive definite and d_k may not be a +// descent direction. Newton implements a Hessian modification strategy that +// adds successively larger multiples of identity to H_k until it becomes +// positive definite. Note that the repeated trial factorization of the +// modified Hessian involved in this process can be computationally expensive. +// +// If the Hessian matrix cannot be formed explicitly or if the computational +// cost of its factorization is prohibitive, BFGS or L-BFGS quasi-Newton method +// can be used instead. +type Newton struct { + // Linesearcher is used for selecting suitable steps along the descent + // direction d. Accepted steps should satisfy at least one of the Wolfe, + // Goldstein or Armijo conditions. + // If Linesearcher == nil, an appropriate default is chosen. + Linesearcher Linesearcher + // Increase is the factor by which a scalar tau is successively increased + // so that (H + tau*I) is positive definite. Larger values reduce the + // number of trial Hessian factorizations, but also reduce the second-order + // information in H. + // Increase must be greater than 1. If Increase is 0, it is defaulted to 5. + Increase float64 + // GradStopThreshold sets the threshold for stopping if the gradient norm + // gets too small. If GradStopThreshold is 0 it is defaulted to 1e-12, and + // if it is NaN the setting is not used. + GradStopThreshold float64 + + status Status + err error + + ls *LinesearchMethod + + hess *mat.SymDense // Storage for a copy of the Hessian matrix. + chol mat.Cholesky // Storage for the Cholesky factorization. + tau float64 +} + +func (n *Newton) Status() (Status, error) { + return n.status, n.err +} + +func (*Newton) Uses(has Available) (uses Available, err error) { + return has.hessian() +} + +func (n *Newton) Init(dim, tasks int) int { + n.status = NotTerminated + n.err = nil + return 1 +} + +func (n *Newton) Run(operation chan<- Task, result <-chan Task, tasks []Task) { + n.status, n.err = localOptimizer{}.run(n, n.GradStopThreshold, operation, result, tasks) + close(operation) +} + +func (n *Newton) initLocal(loc *Location) (Operation, error) { + if n.Increase == 0 { + n.Increase = 5 + } + if n.Increase <= 1 { + panic("optimize: Newton.Increase must be greater than 1") + } + if n.Linesearcher == nil { + n.Linesearcher = &Bisection{} + } + if n.ls == nil { + n.ls = &LinesearchMethod{} + } + n.ls.Linesearcher = n.Linesearcher + n.ls.NextDirectioner = n + return n.ls.Init(loc) +} + +func (n *Newton) iterateLocal(loc *Location) (Operation, error) { + return n.ls.Iterate(loc) +} + +func (n *Newton) InitDirection(loc *Location, dir []float64) (stepSize float64) { + dim := len(loc.X) + n.hess = resizeSymDense(n.hess, dim) + n.tau = 0 + return n.NextDirection(loc, dir) +} + +func (n *Newton) NextDirection(loc *Location, dir []float64) (stepSize float64) { + // This method implements Algorithm 3.3 (Cholesky with Added Multiple of + // the Identity) from Nocedal, Wright (2006), 2nd edition. + + dim := len(loc.X) + d := mat.NewVecDense(dim, dir) + grad := mat.NewVecDense(dim, loc.Gradient) + n.hess.CopySym(loc.Hessian) + + // Find the smallest diagonal entry of the Hessian. + minA := n.hess.At(0, 0) + for i := 1; i < dim; i++ { + a := n.hess.At(i, i) + if a < minA { + minA = a + } + } + // If the smallest diagonal entry is positive, the Hessian may be positive + // definite, and so first attempt to apply the Cholesky factorization to + // the un-modified Hessian. If the smallest entry is negative, use the + // final tau from the last iteration if regularization was needed, + // otherwise guess an appropriate value for tau. + if minA > 0 { + n.tau = 0 + } else if n.tau == 0 { + n.tau = -minA + 0.001 + } + + for k := 0; k < maxNewtonModifications; k++ { + if n.tau != 0 { + // Add a multiple of identity to the Hessian. + for i := 0; i < dim; i++ { + n.hess.SetSym(i, i, loc.Hessian.At(i, i)+n.tau) + } + } + // Try to apply the Cholesky factorization. + pd := n.chol.Factorize(n.hess) + if pd { + // Store the solution in d's backing array, dir. + err := n.chol.SolveVecTo(d, grad) + if err == nil { + d.ScaleVec(-1, d) + return 1 + } + } + // Modified Hessian is not PD, so increase tau. + n.tau = math.Max(n.Increase*n.tau, 0.001) + } + + // Hessian modification failed to get a PD matrix. Return the negative + // gradient as the descent direction. + d.ScaleVec(-1, grad) + return 1 +} + +func (n *Newton) needs() struct { + Gradient bool + Hessian bool +} { + return struct { + Gradient bool + Hessian bool + }{true, true} +} diff --git a/vendor/gonum.org/v1/gonum/optimize/printer.go b/vendor/gonum.org/v1/gonum/optimize/printer.go new file mode 100644 index 0000000000..c4cc77bcbd --- /dev/null +++ b/vendor/gonum.org/v1/gonum/optimize/printer.go @@ -0,0 +1,108 @@ +// Copyright ©2014 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package optimize + +import ( + "fmt" + "io" + "math" + "os" + "time" + + "gonum.org/v1/gonum/floats" +) + +var printerHeadings = [...]string{ + "Iter", + "Runtime", + "FuncEvals", + "Func", + "GradEvals", + "|Gradient|∞", + "HessEvals", +} + +const ( + printerBaseTmpl = "%9v %16v %9v %22v" // Base template for headings and values that are always printed. + printerGradTmpl = " %9v %22v" // Appended to base template when loc.Gradient != nil. + printerHessTmpl = " %9v" // Appended to base template when loc.Hessian != nil. +) + +var _ Recorder = (*Printer)(nil) + +// Printer writes column-format output to the specified writer as the optimization +// progresses. By default, it writes to os.Stdout. +type Printer struct { + Writer io.Writer + HeadingInterval int + ValueInterval time.Duration + + lastHeading int + lastValue time.Time +} + +func NewPrinter() *Printer { + return &Printer{ + Writer: os.Stdout, + HeadingInterval: 30, + ValueInterval: 500 * time.Millisecond, + } +} + +func (p *Printer) Init() error { + p.lastHeading = p.HeadingInterval // So the headings are printed the first time. + p.lastValue = time.Now().Add(-p.ValueInterval) // So the values are printed the first time. + return nil +} + +func (p *Printer) Record(loc *Location, op Operation, stats *Stats) error { + if op != MajorIteration && op != InitIteration && op != PostIteration { + return nil + } + + // Print values always on PostIteration or when ValueInterval has elapsed. + printValues := time.Since(p.lastValue) > p.ValueInterval || op == PostIteration + if !printValues { + // Return early if not printing anything. + return nil + } + + // Print heading when HeadingInterval lines have been printed, but never on PostIteration. + printHeading := p.lastHeading >= p.HeadingInterval && op != PostIteration + if printHeading { + p.lastHeading = 1 + } else { + p.lastHeading++ + } + + if printHeading { + headings := "\n" + fmt.Sprintf(printerBaseTmpl, printerHeadings[0], printerHeadings[1], printerHeadings[2], printerHeadings[3]) + if loc.Gradient != nil { + headings += fmt.Sprintf(printerGradTmpl, printerHeadings[4], printerHeadings[5]) + } + if loc.Hessian != nil { + headings += fmt.Sprintf(printerHessTmpl, printerHeadings[6]) + } + _, err := fmt.Fprintln(p.Writer, headings) + if err != nil { + return err + } + } + + values := fmt.Sprintf(printerBaseTmpl, stats.MajorIterations, stats.Runtime, stats.FuncEvaluations, loc.F) + if loc.Gradient != nil { + values += fmt.Sprintf(printerGradTmpl, stats.GradEvaluations, floats.Norm(loc.Gradient, math.Inf(1))) + } + if loc.Hessian != nil { + values += fmt.Sprintf(printerHessTmpl, stats.HessEvaluations) + } + _, err := fmt.Fprintln(p.Writer, values) + if err != nil { + return err + } + + p.lastValue = time.Now() + return nil +} diff --git a/vendor/gonum.org/v1/gonum/optimize/stepsizers.go b/vendor/gonum.org/v1/gonum/optimize/stepsizers.go new file mode 100644 index 0000000000..6508b573e9 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/optimize/stepsizers.go @@ -0,0 +1,194 @@ +// Copyright ©2014 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package optimize + +import ( + "math" + + "gonum.org/v1/gonum/floats" + "gonum.org/v1/gonum/floats/scalar" +) + +const ( + initialStepFactor = 1 + + quadraticMinimumStepSize = 1e-3 + quadraticMaximumStepSize = 1 + quadraticThreshold = 1e-12 + + firstOrderMinimumStepSize = quadraticMinimumStepSize + firstOrderMaximumStepSize = quadraticMaximumStepSize +) + +var ( + _ StepSizer = ConstantStepSize{} + _ StepSizer = (*QuadraticStepSize)(nil) + _ StepSizer = (*FirstOrderStepSize)(nil) +) + +// ConstantStepSize is a StepSizer that returns the same step size for +// every iteration. +type ConstantStepSize struct { + Size float64 +} + +func (c ConstantStepSize) Init(_ *Location, _ []float64) float64 { + return c.Size +} + +func (c ConstantStepSize) StepSize(_ *Location, _ []float64) float64 { + return c.Size +} + +// QuadraticStepSize estimates the initial line search step size as the minimum +// of a quadratic that interpolates f(x_{k-1}), f(x_k) and ∇f_k⋅p_k. +// This is useful for line search methods that do not produce well-scaled +// descent directions, such as gradient descent or conjugate gradient methods. +// The step size is bounded away from zero. +type QuadraticStepSize struct { + // Threshold determines that the initial step size should be estimated by + // quadratic interpolation when the relative change in the objective + // function is larger than Threshold. Otherwise the initial step size is + // set to 2*previous step size. + // If Threshold is zero, it will be set to 1e-12. + Threshold float64 + // InitialStepFactor sets the step size for the first iteration to be InitialStepFactor / |g|_∞. + // If InitialStepFactor is zero, it will be set to one. + InitialStepFactor float64 + // MinStepSize is the lower bound on the estimated step size. + // MinStepSize times GradientAbsTol should always be greater than machine epsilon. + // If MinStepSize is zero, it will be set to 1e-3. + MinStepSize float64 + // MaxStepSize is the upper bound on the estimated step size. + // If MaxStepSize is zero, it will be set to 1. + MaxStepSize float64 + + fPrev float64 + dirPrevNorm float64 + projGradPrev float64 + xPrev []float64 +} + +func (q *QuadraticStepSize) Init(loc *Location, dir []float64) (stepSize float64) { + if q.Threshold == 0 { + q.Threshold = quadraticThreshold + } + if q.InitialStepFactor == 0 { + q.InitialStepFactor = initialStepFactor + } + if q.MinStepSize == 0 { + q.MinStepSize = quadraticMinimumStepSize + } + if q.MaxStepSize == 0 { + q.MaxStepSize = quadraticMaximumStepSize + } + if q.MaxStepSize <= q.MinStepSize { + panic("optimize: MinStepSize not smaller than MaxStepSize") + } + + gNorm := floats.Norm(loc.Gradient, math.Inf(1)) + stepSize = math.Max(q.MinStepSize, math.Min(q.InitialStepFactor/gNorm, q.MaxStepSize)) + + q.fPrev = loc.F + q.dirPrevNorm = floats.Norm(dir, 2) + q.projGradPrev = floats.Dot(loc.Gradient, dir) + q.xPrev = resize(q.xPrev, len(loc.X)) + copy(q.xPrev, loc.X) + return stepSize +} + +func (q *QuadraticStepSize) StepSize(loc *Location, dir []float64) (stepSize float64) { + stepSizePrev := floats.Distance(loc.X, q.xPrev, 2) / q.dirPrevNorm + projGrad := floats.Dot(loc.Gradient, dir) + + stepSize = 2 * stepSizePrev + if !scalar.EqualWithinRel(q.fPrev, loc.F, q.Threshold) { + // Two consecutive function values are not relatively equal, so + // computing the minimum of a quadratic interpolant might make sense + + df := (loc.F - q.fPrev) / stepSizePrev + quadTest := df - q.projGradPrev + if quadTest > 0 { + // There is a chance of approximating the function well by a + // quadratic only if the finite difference (f_k-f_{k-1})/stepSizePrev + // is larger than ∇f_{k-1}⋅p_{k-1} + + // Set the step size to the minimizer of the quadratic function that + // interpolates f_{k-1}, ∇f_{k-1}⋅p_{k-1} and f_k + stepSize = -q.projGradPrev * stepSizePrev / quadTest / 2 + } + } + // Bound the step size to lie in [MinStepSize, MaxStepSize] + stepSize = math.Max(q.MinStepSize, math.Min(stepSize, q.MaxStepSize)) + + q.fPrev = loc.F + q.dirPrevNorm = floats.Norm(dir, 2) + q.projGradPrev = projGrad + copy(q.xPrev, loc.X) + return stepSize +} + +// FirstOrderStepSize estimates the initial line search step size based on the +// assumption that the first-order change in the function will be the same as +// that obtained at the previous iteration. That is, the initial step size s^0_k +// is chosen so that +// +// s^0_k ∇f_k⋅p_k = s_{k-1} ∇f_{k-1}⋅p_{k-1} +// +// This is useful for line search methods that do not produce well-scaled +// descent directions, such as gradient descent or conjugate gradient methods. +type FirstOrderStepSize struct { + // InitialStepFactor sets the step size for the first iteration to be InitialStepFactor / |g|_∞. + // If InitialStepFactor is zero, it will be set to one. + InitialStepFactor float64 + // MinStepSize is the lower bound on the estimated step size. + // MinStepSize times GradientAbsTol should always be greater than machine epsilon. + // If MinStepSize is zero, it will be set to 1e-3. + MinStepSize float64 + // MaxStepSize is the upper bound on the estimated step size. + // If MaxStepSize is zero, it will be set to 1. + MaxStepSize float64 + + dirPrevNorm float64 + projGradPrev float64 + xPrev []float64 +} + +func (fo *FirstOrderStepSize) Init(loc *Location, dir []float64) (stepSize float64) { + if fo.InitialStepFactor == 0 { + fo.InitialStepFactor = initialStepFactor + } + if fo.MinStepSize == 0 { + fo.MinStepSize = firstOrderMinimumStepSize + } + if fo.MaxStepSize == 0 { + fo.MaxStepSize = firstOrderMaximumStepSize + } + if fo.MaxStepSize <= fo.MinStepSize { + panic("optimize: MinStepSize not smaller than MaxStepSize") + } + + gNorm := floats.Norm(loc.Gradient, math.Inf(1)) + stepSize = math.Max(fo.MinStepSize, math.Min(fo.InitialStepFactor/gNorm, fo.MaxStepSize)) + + fo.dirPrevNorm = floats.Norm(dir, 2) + fo.projGradPrev = floats.Dot(loc.Gradient, dir) + fo.xPrev = resize(fo.xPrev, len(loc.X)) + copy(fo.xPrev, loc.X) + return stepSize +} + +func (fo *FirstOrderStepSize) StepSize(loc *Location, dir []float64) (stepSize float64) { + stepSizePrev := floats.Distance(loc.X, fo.xPrev, 2) / fo.dirPrevNorm + projGrad := floats.Dot(loc.Gradient, dir) + + stepSize = stepSizePrev * fo.projGradPrev / projGrad + stepSize = math.Max(fo.MinStepSize, math.Min(stepSize, fo.MaxStepSize)) + + fo.dirPrevNorm = floats.Norm(dir, 2) + fo.projGradPrev = floats.Dot(loc.Gradient, dir) + copy(fo.xPrev, loc.X) + return stepSize +} diff --git a/vendor/gonum.org/v1/gonum/optimize/termination.go b/vendor/gonum.org/v1/gonum/optimize/termination.go new file mode 100644 index 0000000000..df4bdb7e49 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/optimize/termination.go @@ -0,0 +1,123 @@ +// Copyright ©2014 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package optimize + +import "errors" + +// Status represents the status of the optimization. Programs +// should not rely on the underlying numeric value of the Status being constant. +type Status int + +const ( + NotTerminated Status = iota + Success + FunctionThreshold + FunctionConvergence + GradientThreshold + StepConvergence + FunctionNegativeInfinity + MethodConverge + Failure + IterationLimit + RuntimeLimit + FunctionEvaluationLimit + GradientEvaluationLimit + HessianEvaluationLimit +) + +func (s Status) String() string { + return statuses[s].name +} + +// Early returns true if the status indicates the optimization ended before a +// minimum was found. As an example, if the maximum iterations was reached, a +// minimum was not found, but if the gradient norm was reached then a minimum +// was found. +func (s Status) Early() bool { + return statuses[s].early +} + +// Err returns the error associated with an early ending to the minimization. If +// Early returns false, Err will return nil. +func (s Status) Err() error { + return statuses[s].err +} + +var statuses = []struct { + name string + early bool + err error +}{ + { + name: "NotTerminated", + }, + { + name: "Success", + }, + { + name: "FunctionThreshold", + }, + { + name: "FunctionConvergence", + }, + { + name: "GradientThreshold", + }, + { + name: "StepConvergence", + }, + { + name: "FunctionNegativeInfinity", + }, + { + name: "MethodConverge", + }, + { + name: "Failure", + early: true, + err: errors.New("optimize: termination ended in failure"), + }, + { + name: "IterationLimit", + early: true, + err: errors.New("optimize: maximum number of major iterations reached"), + }, + { + name: "RuntimeLimit", + early: true, + err: errors.New("optimize: maximum runtime reached"), + }, + { + name: "FunctionEvaluationLimit", + early: true, + err: errors.New("optimize: maximum number of function evaluations reached"), + }, + { + name: "GradientEvaluationLimit", + early: true, + err: errors.New("optimize: maximum number of gradient evaluations reached"), + }, + { + name: "HessianEvaluationLimit", + early: true, + err: errors.New("optimize: maximum number of Hessian evaluations reached"), + }, +} + +// NewStatus returns a unique Status variable to represent a custom status. +// NewStatus is intended to be called only during package initialization, and +// calls to NewStatus are not thread safe. +// +// NewStatus takes in three arguments, the string that should be output from +// Status.String, a boolean if the status indicates early optimization conclusion, +// and the error to return from Err (if any). +func NewStatus(name string, early bool, err error) Status { + statuses = append(statuses, struct { + name string + early bool + err error + }{name, early, err}) + return Status(len(statuses) - 1) +} diff --git a/vendor/gonum.org/v1/gonum/optimize/types.go b/vendor/gonum.org/v1/gonum/optimize/types.go new file mode 100644 index 0000000000..e3172c1d70 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/optimize/types.go @@ -0,0 +1,273 @@ +// Copyright ©2014 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package optimize + +import ( + "fmt" + "time" + + "gonum.org/v1/gonum/mat" +) + +const defaultGradientAbsTol = 1e-12 + +// Operation represents the set of operations commanded by Method at each +// iteration. It is a bitmap of various Iteration and Evaluation constants. +// Individual constants must NOT be combined together by the binary OR operator +// except for the Evaluation operations. +type Operation uint64 + +// Supported Operations. +const ( + // NoOperation specifies that no evaluation or convergence check should + // take place. + NoOperation Operation = 0 + // InitIteration is sent to Recorder to indicate the initial location. + // All fields of the location to record must be valid. + // Method must not return it. + InitIteration Operation = 1 << (iota - 1) + // PostIteration is sent to Recorder to indicate the final location + // reached during an optimization run. + // All fields of the location to record must be valid. + // Method must not return it. + PostIteration + // MajorIteration indicates that the next candidate location for + // an optimum has been found and convergence should be checked. + MajorIteration + // MethodDone declares that the method is done running. A method must + // be a Statuser in order to use this iteration, and after returning + // MethodDone, the Status must return other than NotTerminated. + MethodDone + // FuncEvaluation specifies that the objective function + // should be evaluated. + FuncEvaluation + // GradEvaluation specifies that the gradient + // of the objective function should be evaluated. + GradEvaluation + // HessEvaluation specifies that the Hessian + // of the objective function should be evaluated. + HessEvaluation + // signalDone is used internally to signal completion. + signalDone + + // Mask for the evaluating operations. + evalMask = FuncEvaluation | GradEvaluation | HessEvaluation +) + +func (op Operation) isEvaluation() bool { + return op&evalMask != 0 && op&^evalMask == 0 +} + +func (op Operation) String() string { + if op&evalMask != 0 { + return fmt.Sprintf("Evaluation(Func: %t, Grad: %t, Hess: %t, Extra: 0b%b)", + op&FuncEvaluation != 0, + op&GradEvaluation != 0, + op&HessEvaluation != 0, + op&^(evalMask)) + } + s, ok := operationNames[op] + if ok { + return s + } + return fmt.Sprintf("Operation(%d)", op) +} + +var operationNames = map[Operation]string{ + NoOperation: "NoOperation", + InitIteration: "InitIteration", + MajorIteration: "MajorIteration", + PostIteration: "PostIteration", + MethodDone: "MethodDone", + signalDone: "signalDone", +} + +// Result represents the answer of an optimization run. It contains the optimum +// function value, X location, and gradient as well as the Status at convergence +// and Statistics taken during the run. +type Result struct { + Location + Stats + Status Status +} + +// Stats contains the statistics of the run. +type Stats struct { + MajorIterations int // Total number of major iterations + FuncEvaluations int // Number of evaluations of Func + GradEvaluations int // Number of evaluations of Grad + HessEvaluations int // Number of evaluations of Hess + Runtime time.Duration // Total runtime of the optimization +} + +// complementEval returns an evaluating operation that evaluates fields of loc +// not evaluated by eval. +func complementEval(loc *Location, eval Operation) (complEval Operation) { + if eval&FuncEvaluation == 0 { + complEval = FuncEvaluation + } + if loc.Gradient != nil && eval&GradEvaluation == 0 { + complEval |= GradEvaluation + } + if loc.Hessian != nil && eval&HessEvaluation == 0 { + complEval |= HessEvaluation + } + return complEval +} + +// Problem describes the optimization problem to be solved. +type Problem struct { + // Func evaluates the objective function at the given location. Func + // must not modify x. + Func func(x []float64) float64 + + // Grad evaluates the gradient at x and stores the result in grad which will + // be the same length as x. Grad must not modify x. + Grad func(grad, x []float64) + + // Hess evaluates the Hessian at x and stores the result in-place in hess which + // will have dimensions matching the length of x. Hess must not modify x. + Hess func(hess *mat.SymDense, x []float64) + + // Status reports the status of the objective function being optimized and any + // error. This can be used to terminate early, for example when the function is + // not able to evaluate itself. The user can use one of the pre-provided Status + // constants, or may call NewStatus to create a custom Status value. + Status func() (Status, error) +} + +// Available describes the functions available to call in Problem. +type Available struct { + Grad bool + Hess bool +} + +func availFromProblem(prob Problem) Available { + return Available{Grad: prob.Grad != nil, Hess: prob.Hess != nil} +} + +// function tests if the Problem described by the receiver is suitable for an +// unconstrained Method that only calls the function, and returns the result. +func (has Available) function() (uses Available, err error) { + // TODO(btracey): This needs to be modified when optimize supports + // constrained optimization. + return Available{}, nil +} + +// gradient tests if the Problem described by the receiver is suitable for an +// unconstrained gradient-based Method, and returns the result. +func (has Available) gradient() (uses Available, err error) { + // TODO(btracey): This needs to be modified when optimize supports + // constrained optimization. + if !has.Grad { + return Available{}, ErrMissingGrad + } + return Available{Grad: true}, nil +} + +// hessian tests if the Problem described by the receiver is suitable for an +// unconstrained Hessian-based Method, and returns the result. +func (has Available) hessian() (uses Available, err error) { + // TODO(btracey): This needs to be modified when optimize supports + // constrained optimization. + if !has.Grad { + return Available{}, ErrMissingGrad + } + if !has.Hess { + return Available{}, ErrMissingHess + } + return Available{Grad: true, Hess: true}, nil +} + +// Settings represents settings of the optimization run. It contains initial +// settings, convergence information, and Recorder information. Convergence +// settings are only checked at MajorIterations, while Evaluation thresholds +// are checked at every Operation. See the field comments for default values. +type Settings struct { + // InitValues specifies properties (function value, gradient, etc.) known + // at the initial location passed to Minimize. If InitValues is non-nil, then + // the function value F must be provided, the location X must not be specified + // and other fields may be specified. The values in Location may be modified + // during the call to Minimize. + InitValues *Location + + // GradientThreshold stops optimization with GradientThreshold status if the + // infinity norm of the gradient is less than this value. This defaults to + // a value of 0 (and so gradient convergence is not checked), however note + // that many Methods (LBFGS, CG, etc.) will converge with a small value of + // the gradient, and so to fully disable this setting the Method may need to + // be modified. + // This setting has no effect if the gradient is not used by the Method. + GradientThreshold float64 + + // Converger checks if the optimization has converged based on the (history + // of) locations found during the optimization. Minimize will pass the + // Location at every MajorIteration to the Converger. + // + // If the Converger is nil, a default value of + // FunctionConverge { + // Absolute: 1e-10, + // Iterations: 100, + // } + // will be used. NeverTerminated can be used to always return a + // NotTerminated status. + Converger Converger + + // MajorIterations is the maximum number of iterations allowed. + // IterationLimit status is returned if the number of major iterations + // equals or exceeds this value. + // If it equals zero, this setting has no effect. + // The default value is 0. + MajorIterations int + + // Runtime is the maximum runtime allowed. RuntimeLimit status is returned + // if the duration of the run is longer than this value. Runtime is only + // checked at MajorIterations of the Method. + // If it equals zero, this setting has no effect. + // The default value is 0. + Runtime time.Duration + + // FuncEvaluations is the maximum allowed number of function evaluations. + // FunctionEvaluationLimit status is returned if the total number of calls + // to Func equals or exceeds this number. + // If it equals zero, this setting has no effect. + // The default value is 0. + FuncEvaluations int + + // GradEvaluations is the maximum allowed number of gradient evaluations. + // GradientEvaluationLimit status is returned if the total number of calls + // to Grad equals or exceeds this number. + // If it equals zero, this setting has no effect. + // The default value is 0. + GradEvaluations int + + // HessEvaluations is the maximum allowed number of Hessian evaluations. + // HessianEvaluationLimit status is returned if the total number of calls + // to Hess equals or exceeds this number. + // If it equals zero, this setting has no effect. + // The default value is 0. + HessEvaluations int + + Recorder Recorder + + // Concurrent represents how many concurrent evaluations are possible. + Concurrent int +} + +// resize takes x and returns a slice of length dim. It returns a resliced x +// if cap(x) >= dim, and a new slice otherwise. +func resize(x []float64, dim int) []float64 { + if dim > cap(x) { + return make([]float64, dim) + } + return x[:dim] +} + +func resizeSymDense(m *mat.SymDense, dim int) *mat.SymDense { + if m == nil || cap(m.RawSymmetric().Data) < dim*dim { + return mat.NewSymDense(dim, nil) + } + return mat.NewSymDense(dim, m.RawSymmetric().Data[:dim*dim]) +} diff --git a/vendor/gonum.org/v1/gonum/spatial/r1/doc.go b/vendor/gonum.org/v1/gonum/spatial/r1/doc.go new file mode 100644 index 0000000000..0215961190 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/spatial/r1/doc.go @@ -0,0 +1,6 @@ +// Copyright ©2019 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package r1 provides 1D vectors and intervals and operations on them. +package r1 // import "gonum.org/v1/gonum/spatial/r1" diff --git a/vendor/gonum.org/v1/gonum/spatial/r1/interval.go b/vendor/gonum.org/v1/gonum/spatial/r1/interval.go new file mode 100644 index 0000000000..71f42fd3f7 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/spatial/r1/interval.go @@ -0,0 +1,10 @@ +// Copyright ©2019 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package r1 + +// Interval represents an interval. +type Interval struct { + Min, Max float64 +} diff --git a/vendor/gonum.org/v1/gonum/stat/README.md b/vendor/gonum.org/v1/gonum/stat/README.md new file mode 100644 index 0000000000..7156dc5095 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/stat/README.md @@ -0,0 +1,6 @@ +# Gonum stat + +[![go.dev reference](https://pkg.go.dev/badge/gonum.org/v1/gonum/stat)](https://pkg.go.dev/gonum.org/v1/gonum/stat) +[![GoDoc](https://godocs.io/gonum.org/v1/gonum/stat?status.svg)](https://godocs.io/gonum.org/v1/gonum/stat) + +Package stat is a statistics package for the Go language. diff --git a/vendor/gonum.org/v1/gonum/stat/combin/combin.go b/vendor/gonum.org/v1/gonum/stat/combin/combin.go new file mode 100644 index 0000000000..b5814171cc --- /dev/null +++ b/vendor/gonum.org/v1/gonum/stat/combin/combin.go @@ -0,0 +1,683 @@ +// Copyright ©2016 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package combin + +import ( + "math" + "sort" +) + +const ( + errNegInput = "combin: negative input" + badSetSize = "combin: n < k" + badInput = "combin: wrong input slice length" + errNonpositiveDimension = "combin: non-positive dimension" +) + +// Binomial returns the binomial coefficient of (n,k), also commonly referred to +// as "n choose k". +// +// The binomial coefficient, C(n,k), is the number of unordered combinations of +// k elements in a set that is n elements big, and is defined as +// +// C(n,k) = n!/((n-k)!k!) +// +// n and k must be non-negative with n >= k, otherwise Binomial will panic. +// No check is made for overflow. +func Binomial(n, k int) int { + if n < 0 || k < 0 { + panic(errNegInput) + } + if n < k { + panic(badSetSize) + } + // (n,k) = (n, n-k) + if k > n/2 { + k = n - k + } + b := 1 + for i := 1; i <= k; i++ { + b = (n - k + i) * b / i + } + return b +} + +// GeneralizedBinomial returns the generalized binomial coefficient of (n, k), +// defined as +// +// Γ(n+1) / (Γ(k+1) Γ(n-k+1)) +// +// where Γ is the Gamma function. GeneralizedBinomial is useful for continuous +// relaxations of the binomial coefficient, or when the binomial coefficient value +// may overflow int. In the latter case, one may use math/big for an exact +// computation. +// +// n and k must be non-negative with n >= k, otherwise GeneralizedBinomial will panic. +func GeneralizedBinomial(n, k float64) float64 { + return math.Exp(LogGeneralizedBinomial(n, k)) +} + +// LogGeneralizedBinomial returns the log of the generalized binomial coefficient. +// See GeneralizedBinomial for more information. +func LogGeneralizedBinomial(n, k float64) float64 { + if n < 0 || k < 0 { + panic(errNegInput) + } + if n < k { + panic(badSetSize) + } + a, _ := math.Lgamma(n + 1) + b, _ := math.Lgamma(k + 1) + c, _ := math.Lgamma(n - k + 1) + return a - b - c +} + +// CombinationGenerator generates combinations iteratively. The Combinations +// function may be called to generate all combinations collectively. +type CombinationGenerator struct { + n int + k int + previous []int + remaining int +} + +// NewCombinationGenerator returns a CombinationGenerator for generating the +// combinations of k elements from a set of size n. +// +// n and k must be non-negative with n >= k, otherwise NewCombinationGenerator +// will panic. +func NewCombinationGenerator(n, k int) *CombinationGenerator { + return &CombinationGenerator{ + n: n, + k: k, + remaining: Binomial(n, k), + } +} + +// Next advances the iterator if there are combinations remaining to be generated, +// and returns false if all combinations have been generated. Next must be called +// to initialize the first value before calling Combination or Combination will +// panic. The value returned by Combination is only changed during calls to Next. +func (c *CombinationGenerator) Next() bool { + if c.remaining <= 0 { + // Next is called before combination, so c.remaining is set to zero before + // Combination is called. Thus, Combination cannot panic on zero, and a + // second sentinel value is needed. + c.remaining = -1 + return false + } + if c.previous == nil { + c.previous = make([]int, c.k) + for i := range c.previous { + c.previous[i] = i + } + } else { + nextCombination(c.previous, c.n, c.k) + } + c.remaining-- + return true +} + +// Combination returns the current combination. If dst is non-nil, it must have +// length k and the result will be stored in-place into dst. If dst +// is nil a new slice will be allocated and returned. If all of the combinations +// have already been constructed (Next() returns false), Combination will panic. +// +// Next must be called to initialize the first value before calling Combination +// or Combination will panic. The value returned by Combination is only changed +// during calls to Next. +func (c *CombinationGenerator) Combination(dst []int) []int { + if c.remaining == -1 { + panic("combin: all combinations have been generated") + } + if c.previous == nil { + panic("combin: Combination called before Next") + } + if dst == nil { + dst = make([]int, c.k) + } else if len(dst) != c.k { + panic(badInput) + } + copy(dst, c.previous) + return dst +} + +// Combinations generates all of the combinations of k elements from a +// set of size n. The returned slice has length Binomial(n,k) and each inner slice +// has length k. +// +// n and k must be non-negative with n >= k, otherwise Combinations will panic. +// +// CombinationGenerator may alternatively be used to generate the combinations +// iteratively instead of collectively, or IndexToCombination for random access. +func Combinations(n, k int) [][]int { + combins := Binomial(n, k) + data := make([][]int, combins) + if len(data) == 0 { + return data + } + data[0] = make([]int, k) + for i := range data[0] { + data[0][i] = i + } + for i := 1; i < combins; i++ { + next := make([]int, k) + copy(next, data[i-1]) + nextCombination(next, n, k) + data[i] = next + } + return data +} + +// nextCombination generates the combination after s, overwriting the input value. +func nextCombination(s []int, n, k int) { + for j := k - 1; j >= 0; j-- { + if s[j] == n+j-k { + continue + } + s[j]++ + for l := j + 1; l < k; l++ { + s[l] = s[j] + l - j + } + break + } +} + +// CombinationIndex returns the index of the given combination. +// +// The functions CombinationIndex and IndexToCombination define a bijection +// between the integers and the Binomial(n, k) number of possible combinations. +// CombinationIndex returns the inverse of IndexToCombination. +// +// CombinationIndex panics if comb is not a sorted combination of the first +// [0,n) integers, if n or k are negative, or if k is greater than n. +func CombinationIndex(comb []int, n, k int) int { + if n < 0 || k < 0 { + panic(errNegInput) + } + if n < k { + panic(badSetSize) + } + if len(comb) != k { + panic("combin: bad length combination") + } + if !sort.IntsAreSorted(comb) { + panic("combin: input combination is not sorted") + } + contains := make(map[int]struct{}, k) + for _, v := range comb { + contains[v] = struct{}{} + } + if len(contains) != k { + panic("combin: comb contains non-unique elements") + } + // This algorithm iterates in reverse lexicograhpic order. + // Flip the index and values to swap the order. + rev := make([]int, k) + for i, v := range comb { + rev[len(comb)-i-1] = n - v - 1 + } + idx := 0 + for i, v := range rev { + if v >= i+1 { + idx += Binomial(v, i+1) + } + } + return Binomial(n, k) - 1 - idx +} + +// IndexToCombination returns the combination corresponding to the given index. +// +// The functions CombinationIndex and IndexToCombination define a bijection +// between the integers and the Binomial(n, k) number of possible combinations. +// IndexToCombination returns the inverse of CombinationIndex (up to the order +// of the elements). +// +// The combination is stored in-place into dst if dst is non-nil, otherwise +// a new slice is allocated and returned. +// +// IndexToCombination panics if n or k are negative, if k is greater than n, +// or if idx is not in [0, Binomial(n,k)-1]. IndexToCombination will also panic +// if dst is non-nil and len(dst) is not k. +func IndexToCombination(dst []int, idx, n, k int) []int { + if idx < 0 || idx >= Binomial(n, k) { + panic("combin: invalid index") + } + if dst == nil { + dst = make([]int, k) + } else if len(dst) != k { + panic(badInput) + } + // The base algorithm indexes in reverse lexicographic order + // flip the values and the index. + idx = Binomial(n, k) - 1 - idx + for i := range dst { + // Find the largest number m such that Binomial(m, k-i) <= idx. + // This is one less than the first number such that it is larger. + m := sort.Search(n, func(m int) bool { + if m < k-i { + return false + } + return Binomial(m, k-i) > idx + }) + m-- + // Normally this is put m into the last free spot, but we + // reverse the index and the value. + dst[i] = n - m - 1 + if m >= k-i { + idx -= Binomial(m, k-i) + } + } + return dst +} + +// Cartesian returns the Cartesian product of the slices in data. The Cartesian +// product of two sets is the set of all combinations of the items. For example, +// given the input +// +// []int{2, 3, 1} +// +// the returned matrix will be +// +// [ 0 0 0 ] +// [ 0 1 0 ] +// [ 0 2 0 ] +// [ 1 0 0 ] +// [ 1 1 0 ] +// [ 1 2 0 ] +// +// Cartesian panics if any of the provided lengths are less than 1. +func Cartesian(lens []int) [][]int { + rows := Card(lens) + if rows == 0 { + panic("combin: empty lengths") + } + out := make([][]int, rows) + for i := 0; i < rows; i++ { + out[i] = SubFor(nil, i, lens) + } + return out +} + +// Card computes the cardinality of the multi-dimensional space whose dimensions have size specified by dims +// All length values must be positive, otherwise this will panic. +func Card(dims []int) int { + if len(dims) == 0 { + return 0 + } + card := 1 + for _, v := range dims { + if v < 0 { + panic("combin: length less than zero") + } + card *= v + } + return card +} + +// NewCartesianGenerator returns a CartesianGenerator for iterating over Cartesian products which are generated on the fly. +// All values in lens must be positive, otherwise this will panic. +func NewCartesianGenerator(lens []int) *CartesianGenerator { + return &CartesianGenerator{ + lens: lens, + rows: Card(lens), + idx: -1, + } +} + +// CartesianGenerator iterates over a Cartesian product set. +type CartesianGenerator struct { + lens []int + rows int + idx int +} + +// Next moves to the next product of the Cartesian set. +// It returns false if the generator reached the end of the Cartesian set end. +func (g *CartesianGenerator) Next() bool { + if g.idx+1 < g.rows { + g.idx++ + return true + } + g.idx = g.rows + return false +} + +// Product generates one product of the Cartesian set according to the current index which is increased by Next(). +// Next needs to be called at least one time before this method, otherwise it will panic. +func (g *CartesianGenerator) Product(dst []int) []int { + return SubFor(dst, g.idx, g.lens) +} + +// IdxFor converts a multi-dimensional index into a linear index for a +// multi-dimensional space. sub specifies the index for each dimension, and dims +// specifies the size of each dimension. IdxFor is the inverse of SubFor. +// IdxFor panics if any of the entries of sub are negative, any of the entries +// of dim are non-positive, or if sub[i] >= dims[i] for any i. +func IdxFor(sub, dims []int) int { + // The index returned is "row-major", that is the last index of sub is + // continuous. + var idx int + stride := 1 + for i := len(dims) - 1; i >= 0; i-- { + v := sub[i] + d := dims[i] + if d <= 0 { + panic(errNonpositiveDimension) + } + if v < 0 || v >= d { + panic("combin: invalid subscript") + } + idx += v * stride + stride *= d + } + return idx +} + +// SubFor returns the multi-dimensional subscript for the input linear index to +// the multi-dimensional space. dims specifies the size of each dimension, and +// idx specifies the linear index. SubFor is the inverse of IdxFor. +// +// If sub is non-nil the result is stored in-place into sub, and SubFor will panic +// if len(sub) != len(dims). If sub is nil a new slice of the appropriate length +// is allocated. SubFor panics if idx < 0 or if idx is greater than or equal to +// the product of the dimensions. +func SubFor(sub []int, idx int, dims []int) []int { + if sub == nil { + sub = make([]int, len(dims)) + } + if len(sub) != len(dims) { + panic(badInput) + } + if idx < 0 { + panic(errNegInput) + } + stride := 1 + for i := len(dims) - 1; i >= 1; i-- { + stride *= dims[i] + } + for i := 0; i < len(dims)-1; i++ { + v := idx / stride + d := dims[i] + if d < 0 { + panic(errNonpositiveDimension) + } + if v >= dims[i] { + panic("combin: index too large") + } + sub[i] = v + idx -= v * stride + stride /= dims[i+1] + } + if idx > dims[len(sub)-1] { + panic("combin: index too large") + } + sub[len(sub)-1] = idx + return sub +} + +// NumPermutations returns the number of permutations when selecting k +// objects from a set of n objects when the selection order matters. +// No check is made for overflow. +// +// NumPermutations panics if either n or k is negative, or if k is +// greater than n. +func NumPermutations(n, k int) int { + if n < 0 { + panic("combin: n is negative") + } + if k < 0 { + panic("combin: k is negative") + } + if k > n { + panic("combin: k is greater than n") + } + p := 1 + for i := n - k + 1; i <= n; i++ { + p *= i + } + return p +} + +// Permutations generates all of the permutations of k elements from a +// set of size n. The returned slice has length NumPermutations(n, k) +// and each inner slice has length k. +// +// n and k must be non-negative with n >= k, otherwise Permutations will panic. +// +// PermutationGenerator may alternatively be used to generate the permutations +// iteratively instead of collectively, or IndexToPermutation for random access. +func Permutations(n, k int) [][]int { + nPerms := NumPermutations(n, k) + data := make([][]int, nPerms) + if len(data) == 0 { + return data + } + for i := 0; i < nPerms; i++ { + data[i] = IndexToPermutation(nil, i, n, k) + } + return data +} + +// PermutationGenerator generates permutations iteratively. The Permutations +// function may be called to generate all permutations collectively. +type PermutationGenerator struct { + n int + k int + nPerm int + idx int + permutation []int +} + +// NewPermutationGenerator returns a PermutationGenerator for generating the +// permutations of k elements from a set of size n. +// +// n and k must be non-negative with n >= k, otherwise NewPermutationGenerator +// will panic. +func NewPermutationGenerator(n, k int) *PermutationGenerator { + return &PermutationGenerator{ + n: n, + k: k, + nPerm: NumPermutations(n, k), + idx: -1, + permutation: make([]int, k), + } +} + +// Next advances the iterator if there are permutations remaining to be generated, +// and returns false if all permutations have been generated. Next must be called +// to initialize the first value before calling Permutation or Permutation will +// panic. The value returned by Permutation is only changed during calls to Next. +func (p *PermutationGenerator) Next() bool { + if p.idx >= p.nPerm-1 { + p.idx = p.nPerm // so Permutation can panic. + return false + } + p.idx++ + IndexToPermutation(p.permutation, p.idx, p.n, p.k) + return true +} + +// Permutation returns the current permutation. If dst is non-nil, it must have +// length k and the result will be stored in-place into dst. If dst +// is nil a new slice will be allocated and returned. If all of the permutations +// have already been constructed (Next() returns false), Permutation will panic. +// +// Next must be called to initialize the first value before calling Permutation +// or Permutation will panic. The value returned by Permutation is only changed +// during calls to Next. +func (p *PermutationGenerator) Permutation(dst []int) []int { + if p.idx == p.nPerm { + panic("combin: all permutations have been generated") + } + if p.idx == -1 { + panic("combin: Permutation called before Next") + } + if dst == nil { + dst = make([]int, p.k) + } else if len(dst) != p.k { + panic(badInput) + } + copy(dst, p.permutation) + return dst +} + +// PermutationIndex returns the index of the given permutation. +// +// The functions PermutationIndex and IndexToPermutation define a bijection +// between the integers and the NumPermutations(n, k) number of possible permutations. +// PermutationIndex returns the inverse of IndexToPermutation. +// +// PermutationIndex panics if perm is not a permutation of k of the first +// [0,n) integers, if n or k are negative, or if k is greater than n. +func PermutationIndex(perm []int, n, k int) int { + if n < 0 || k < 0 { + panic(errNegInput) + } + if n < k { + panic(badSetSize) + } + if len(perm) != k { + panic("combin: bad length permutation") + } + contains := make(map[int]struct{}, k) + for _, v := range perm { + if v < 0 || v >= n { + panic("combin: bad element") + } + contains[v] = struct{}{} + } + if len(contains) != k { + panic("combin: perm contains non-unique elements") + } + if n == k { + // The permutation is the ordering of the elements. + return equalPermutationIndex(perm) + } + + // The permutation index is found by finding the combination index and the + // equalPermutation index. The combination index is found by just sorting + // the elements, and the permutation index is the ordering of the size + // of the elements. + tmp := make([]int, len(perm)) + copy(tmp, perm) + idx := make([]int, len(perm)) + for i := range idx { + idx[i] = i + } + s := sortInts{tmp, idx} + sort.Sort(s) + order := make([]int, len(perm)) + for i, v := range idx { + order[v] = i + } + combIdx := CombinationIndex(tmp, n, k) + permIdx := equalPermutationIndex(order) + return combIdx*NumPermutations(k, k) + permIdx +} + +type sortInts struct { + data []int + idx []int +} + +func (s sortInts) Len() int { + return len(s.data) +} + +func (s sortInts) Less(i, j int) bool { + return s.data[i] < s.data[j] +} + +func (s sortInts) Swap(i, j int) { + s.data[i], s.data[j] = s.data[j], s.data[i] + s.idx[i], s.idx[j] = s.idx[j], s.idx[i] +} + +// IndexToPermutation returns the permutation corresponding to the given index. +// +// The functions PermutationIndex and IndexToPermutation define a bijection +// between the integers and the NumPermutations(n, k) number of possible permutations. +// IndexToPermutation returns the inverse of PermutationIndex. +// +// The permutation is stored in-place into dst if dst is non-nil, otherwise +// a new slice is allocated and returned. +// +// IndexToPermutation panics if n or k are negative, if k is greater than n, +// or if idx is not in [0, NumPermutations(n,k)-1]. IndexToPermutation will also panic +// if dst is non-nil and len(dst) is not k. +func IndexToPermutation(dst []int, idx, n, k int) []int { + nPerm := NumPermutations(n, k) + if idx < 0 || idx >= nPerm { + panic("combin: invalid index") + } + if dst == nil { + dst = make([]int, k) + } else if len(dst) != k { + panic(badInput) + } + if n == k { + indexToEqualPermutation(dst, idx) + return dst + } + + // First, we index into the combination (which of the k items to choose) + // and then we index into the n == k permutation of those k items. The + // indexing acts like a matrix with nComb rows and factorial(k) columns. + kPerm := NumPermutations(k, k) + combIdx := idx / kPerm + permIdx := idx % kPerm + comb := IndexToCombination(nil, combIdx, n, k) // Gives us the set of integers. + perm := make([]int, len(dst)) + indexToEqualPermutation(perm, permIdx) // Gives their order. + for i, v := range perm { + dst[i] = comb[v] + } + return dst +} + +// equalPermutationIndex returns the index of the given permutation of the +// first k integers. +func equalPermutationIndex(perm []int) int { + // Note(btracey): This is an n^2 algorithm, but factorial increases + // very quickly (25! overflows int64) so this is not a problem in + // practice. + idx := 0 + for i, u := range perm { + less := 0 + for _, v := range perm[i:] { + if v < u { + less++ + } + } + idx += less * factorial(len(perm)-i-1) + } + return idx +} + +// indexToEqualPermutation returns the permutation for the first len(dst) +// integers for the given index. +func indexToEqualPermutation(dst []int, idx int) { + for i := range dst { + dst[i] = i + } + for i := range dst { + f := factorial(len(dst) - i - 1) + r := idx / f + v := dst[i+r] + copy(dst[i+1:i+r+1], dst[i:i+r]) + dst[i] = v + idx %= f + } +} + +// factorial returns a!. +func factorial(a int) int { + f := 1 + for i := 2; i <= a; i++ { + f *= i + } + return f +} diff --git a/vendor/gonum.org/v1/gonum/stat/combin/doc.go b/vendor/gonum.org/v1/gonum/stat/combin/doc.go new file mode 100644 index 0000000000..496045cdd3 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/stat/combin/doc.go @@ -0,0 +1,7 @@ +// Copyright ©2017 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package combin implements routines involving combinatorics (permutations, +// combinations, etc.). +package combin // import "gonum.org/v1/gonum/stat/combin" diff --git a/vendor/gonum.org/v1/gonum/stat/distmv/dirichlet.go b/vendor/gonum.org/v1/gonum/stat/distmv/dirichlet.go new file mode 100644 index 0000000000..61d799884c --- /dev/null +++ b/vendor/gonum.org/v1/gonum/stat/distmv/dirichlet.go @@ -0,0 +1,149 @@ +// Copyright ©2016 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package distmv + +import ( + "math" + "math/rand/v2" + + "gonum.org/v1/gonum/floats" + "gonum.org/v1/gonum/mat" + "gonum.org/v1/gonum/stat/distuv" +) + +// Dirichlet implements the Dirichlet probability distribution. +// +// The Dirichlet distribution is a continuous probability distribution that +// generates elements over the probability simplex, i.e. ||x||_1 = 1. The Dirichlet +// distribution is the conjugate prior to the categorical distribution and the +// multivariate version of the beta distribution. The probability of a point x is +// +// 1/Beta(α) \prod_i x_i^(α_i - 1) +// +// where Beta(α) is the multivariate Beta function (see the mathext package). +// +// For more information see https://en.wikipedia.org/wiki/Dirichlet_distribution +type Dirichlet struct { + alpha []float64 + dim int + src rand.Source + + lbeta float64 + sumAlpha float64 +} + +// NewDirichlet creates a new dirichlet distribution with the given parameters alpha. +// NewDirichlet will panic if len(alpha) == 0, or if any alpha is <= 0. +func NewDirichlet(alpha []float64, src rand.Source) *Dirichlet { + dim := len(alpha) + if dim == 0 { + panic(badZeroDimension) + } + for _, v := range alpha { + if v <= 0 { + panic("dirichlet: non-positive alpha") + } + } + a := make([]float64, len(alpha)) + copy(a, alpha) + d := &Dirichlet{ + alpha: a, + dim: dim, + src: src, + } + d.lbeta, d.sumAlpha = d.genLBeta(a) + return d +} + +// CovarianceMatrix calculates the covariance matrix of the distribution, +// storing the result in dst. Upon return, the value at element {i, j} of the +// covariance matrix is equal to the covariance of the i^th and j^th variables. +// +// covariance(i, j) = E[(x_i - E[x_i])(x_j - E[x_j])] +// +// If the dst matrix is empty it will be resized to the correct dimensions, +// otherwise dst must match the dimension of the receiver or CovarianceMatrix +// will panic. +func (d *Dirichlet) CovarianceMatrix(dst *mat.SymDense) { + if dst.IsEmpty() { + *dst = *(dst.GrowSym(d.dim).(*mat.SymDense)) + } else if dst.SymmetricDim() != d.dim { + panic("dirichelet: input matrix size mismatch") + } + scale := 1 / (d.sumAlpha * d.sumAlpha * (d.sumAlpha + 1)) + for i := 0; i < d.dim; i++ { + ai := d.alpha[i] + v := ai * (d.sumAlpha - ai) * scale + dst.SetSym(i, i, v) + for j := i + 1; j < d.dim; j++ { + aj := d.alpha[j] + v := -ai * aj * scale + dst.SetSym(i, j, v) + } + } +} + +// genLBeta computes the generalized LBeta function. +func (d *Dirichlet) genLBeta(alpha []float64) (lbeta, sumAlpha float64) { + for _, alpha := range d.alpha { + lg, _ := math.Lgamma(alpha) + lbeta += lg + sumAlpha += alpha + } + lg, _ := math.Lgamma(sumAlpha) + return lbeta - lg, sumAlpha +} + +// Dim returns the dimension of the distribution. +func (d *Dirichlet) Dim() int { + return d.dim +} + +// LogProb computes the log of the pdf of the point x. +// +// It does not check that ||x||_1 = 1. +func (d *Dirichlet) LogProb(x []float64) float64 { + dim := d.dim + if len(x) != dim { + panic(badSizeMismatch) + } + var lprob float64 + for i, x := range x { + lprob += (d.alpha[i] - 1) * math.Log(x) + } + lprob -= d.lbeta + return lprob +} + +// Mean returns the mean of the probability distribution. +// +// If dst is not nil, the mean will be stored in-place into dst and returned, +// otherwise a new slice will be allocated first. If dst is not nil, it must +// have length equal to the dimension of the distribution. +func (d *Dirichlet) Mean(dst []float64) []float64 { + dst = reuseAs(dst, d.dim) + floats.ScaleTo(dst, 1/d.sumAlpha, d.alpha) + return dst +} + +// Prob computes the value of the probability density function at x. +func (d *Dirichlet) Prob(x []float64) float64 { + return math.Exp(d.LogProb(x)) +} + +// Rand generates a random number according to the distribution. +// +// If dst is not nil, the sample will be stored in-place into dst and returned, +// otherwise a new slice will be allocated first. If dst is not nil, it must +// have length equal to the dimension of the distribution. +func (d *Dirichlet) Rand(dst []float64) []float64 { + dst = reuseAs(dst, d.dim) + for i, alpha := range d.alpha { + dst[i] = distuv.Gamma{Alpha: alpha, Beta: 1, Src: d.src}.Rand() + } + sum := floats.Sum(dst) + floats.Scale(1/sum, dst) + return dst +} diff --git a/vendor/gonum.org/v1/gonum/stat/distmv/distmv.go b/vendor/gonum.org/v1/gonum/stat/distmv/distmv.go new file mode 100644 index 0000000000..49b67291a4 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/stat/distmv/distmv.go @@ -0,0 +1,28 @@ +// Copyright ©2015 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package distmv + +const ( + badQuantile = "distmv: quantile not between 0 and 1" + badOutputLen = "distmv: output slice is not nil or the correct length" + badInputLength = "distmv: input slice length mismatch" + badSizeMismatch = "distmv: size mismatch" + badZeroDimension = "distmv: zero dimensional input" + nonPosDimension = "distmv: non-positive dimension input" +) + +const logTwoPi = 1.8378770664093454835606594728112352797227949472755668 + +// reuseAs returns a slice of length n. If len(dst) is n, dst is returned, +// otherwise dst must be nil or reuseAs will panic. +func reuseAs(dst []float64, n int) []float64 { + if dst == nil { + dst = make([]float64, n) + } + if len(dst) != n { + panic(badOutputLen) + } + return dst +} diff --git a/vendor/gonum.org/v1/gonum/stat/distmv/doc.go b/vendor/gonum.org/v1/gonum/stat/distmv/doc.go new file mode 100644 index 0000000000..142e056816 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/stat/distmv/doc.go @@ -0,0 +1,6 @@ +// Copyright ©2017 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package distmv provides multivariate random distribution types. +package distmv // import "gonum.org/v1/gonum/stat/distmv" diff --git a/vendor/gonum.org/v1/gonum/stat/distmv/interfaces.go b/vendor/gonum.org/v1/gonum/stat/distmv/interfaces.go new file mode 100644 index 0000000000..04f56aa29b --- /dev/null +++ b/vendor/gonum.org/v1/gonum/stat/distmv/interfaces.go @@ -0,0 +1,35 @@ +// Copyright ©2016 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package distmv + +// Quantiler returns the multi-dimensional inverse cumulative distribution function. +// len(x) must equal len(p), and if x is non-nil, len(x) must also equal len(p). +// If x is nil, a new slice will be allocated and returned, otherwise the quantile +// will be stored in-place into x. All of the values of p must be between 0 and 1, +// or Quantile will panic. +type Quantiler interface { + Quantile(x, p []float64) []float64 +} + +// LogProber computes the log of the probability of the point x. +type LogProber interface { + LogProb(x []float64) float64 +} + +// Rander generates a random number according to the distribution. +// +// If the input is non-nil, len(x) must equal len(p) and the dimension of the distribution, +// otherwise Quantile will panic. +// +// If the input is nil, a new slice will be allocated and returned. +type Rander interface { + Rand(x []float64) []float64 +} + +// RandLogProber is both a Rander and a LogProber. +type RandLogProber interface { + Rander + LogProber +} diff --git a/vendor/gonum.org/v1/gonum/stat/distmv/normal.go b/vendor/gonum.org/v1/gonum/stat/distmv/normal.go new file mode 100644 index 0000000000..a52f37c240 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/stat/distmv/normal.go @@ -0,0 +1,524 @@ +// Copyright ©2015 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package distmv + +import ( + "math" + "math/rand/v2" + + "gonum.org/v1/gonum/floats" + "gonum.org/v1/gonum/mat" + "gonum.org/v1/gonum/stat" + "gonum.org/v1/gonum/stat/distuv" +) + +// Normal is a multivariate normal distribution (also known as the multivariate +// Gaussian distribution). Its pdf in k dimensions is given by +// +// (2 π)^(-k/2) |Σ|^(-1/2) exp(-1/2 (x-μ)'Σ^-1(x-μ)) +// +// where μ is the mean vector and Σ the covariance matrix. Σ must be symmetric +// and positive definite. Use NewNormal to construct. +type Normal struct { + mu []float64 + + sigma mat.SymDense + + chol mat.Cholesky + logSqrtDet float64 + dim int + + // If src is altered, rnd must be updated. + src rand.Source + rnd *rand.Rand +} + +// NewNormal creates a new Normal with the given mean and covariance matrix. +// NewNormal panics if len(mu) == 0, or if len(mu) != sigma.N. If the covariance +// matrix is not positive-definite, the returned boolean is false. +func NewNormal(mu []float64, sigma mat.Symmetric, src rand.Source) (*Normal, bool) { + if len(mu) == 0 { + panic(badZeroDimension) + } + dim := sigma.SymmetricDim() + if dim != len(mu) { + panic(badSizeMismatch) + } + n := &Normal{ + src: src, + rnd: rand.New(src), + dim: dim, + mu: make([]float64, dim), + } + copy(n.mu, mu) + ok := n.chol.Factorize(sigma) + if !ok { + return nil, false + } + n.sigma = *mat.NewSymDense(dim, nil) + n.sigma.CopySym(sigma) + n.logSqrtDet = 0.5 * n.chol.LogDet() + return n, true +} + +// NewNormalChol creates a new Normal distribution with the given mean and +// covariance matrix represented by its Cholesky decomposition. NewNormalChol +// panics if len(mu) is not equal to chol.Size(). +func NewNormalChol(mu []float64, chol *mat.Cholesky, src rand.Source) *Normal { + dim := len(mu) + if dim != chol.SymmetricDim() { + panic(badSizeMismatch) + } + n := &Normal{ + src: src, + rnd: rand.New(src), + dim: dim, + mu: make([]float64, dim), + } + n.chol.Clone(chol) + copy(n.mu, mu) + n.logSqrtDet = 0.5 * n.chol.LogDet() + return n +} + +// NewNormalPrecision creates a new Normal distribution with the given mean and +// precision matrix (inverse of the covariance matrix). NewNormalPrecision +// panics if len(mu) is not equal to prec.SymmetricDim(). If the precision matrix +// is not positive-definite, NewNormalPrecision returns nil for norm and false +// for ok. +func NewNormalPrecision(mu []float64, prec *mat.SymDense, src rand.Source) (norm *Normal, ok bool) { + if len(mu) == 0 { + panic(badZeroDimension) + } + dim := prec.SymmetricDim() + if dim != len(mu) { + panic(badSizeMismatch) + } + // TODO(btracey): Computing a matrix inverse is generally numerically unstable. + // This only has to compute the inverse of a positive definite matrix, which + // is much better, but this still loses precision. It is worth considering if + // instead the precision matrix should be stored explicitly and used instead + // of the Cholesky decomposition of the covariance matrix where appropriate. + var chol mat.Cholesky + ok = chol.Factorize(prec) + if !ok { + return nil, false + } + var sigma mat.SymDense + err := chol.InverseTo(&sigma) + if err != nil { + return nil, false + } + return NewNormal(mu, &sigma, src) +} + +// ConditionNormal returns the Normal distribution that is the receiver conditioned +// on the input evidence. The returned multivariate normal has dimension +// n - len(observed), where n is the dimension of the original receiver. The updated +// mean and covariance are +// +// mu = mu_un + sigma_{ob,un}ᵀ * sigma_{ob,ob}^-1 (v - mu_ob) +// sigma = sigma_{un,un} - sigma_{ob,un}ᵀ * sigma_{ob,ob}^-1 * sigma_{ob,un} +// +// where mu_un and mu_ob are the original means of the unobserved and observed +// variables respectively, sigma_{un,un} is the unobserved subset of the covariance +// matrix, sigma_{ob,ob} is the observed subset of the covariance matrix, and +// sigma_{un,ob} are the cross terms. The elements of x_2 have been observed with +// values v. The dimension order is preserved during conditioning, so if the value +// of dimension 1 is observed, the returned normal represents dimensions {0, 2, ...} +// of the original Normal distribution. +// +// ConditionNormal returns {nil, false} if there is a failure during the update. +// Mathematically this is impossible, but can occur with finite precision arithmetic. +func (n *Normal) ConditionNormal(observed []int, values []float64, src rand.Source) (*Normal, bool) { + if len(observed) == 0 { + panic("normal: no observed value") + } + if len(observed) != len(values) { + panic(badInputLength) + } + for _, v := range observed { + if v < 0 || v >= n.Dim() { + panic("normal: observed value out of bounds") + } + } + + _, mu1, sigma11 := studentsTConditional(observed, values, math.Inf(1), n.mu, &n.sigma) + if mu1 == nil { + return nil, false + } + return NewNormal(mu1, sigma11, src) +} + +// CovarianceMatrix stores the covariance matrix of the distribution in dst. +// Upon return, the value at element {i, j} of the covariance matrix is equal +// to the covariance of the i^th and j^th variables. +// +// covariance(i, j) = E[(x_i - E[x_i])(x_j - E[x_j])] +// +// If the dst matrix is empty it will be resized to the correct dimensions, +// otherwise dst must match the dimension of the receiver or CovarianceMatrix +// will panic. +func (n *Normal) CovarianceMatrix(dst *mat.SymDense) { + if dst.IsEmpty() { + *dst = *(dst.GrowSym(n.dim).(*mat.SymDense)) + } else if dst.SymmetricDim() != n.dim { + panic("normal: input matrix size mismatch") + } + dst.CopySym(&n.sigma) +} + +// Dim returns the dimension of the distribution. +func (n *Normal) Dim() int { + return n.dim +} + +// Entropy returns the differential entropy of the distribution. +func (n *Normal) Entropy() float64 { + return float64(n.dim)/2*(1+logTwoPi) + n.logSqrtDet +} + +// LogProb computes the log of the pdf of the point x. +func (n *Normal) LogProb(x []float64) float64 { + dim := n.dim + if len(x) != dim { + panic(badSizeMismatch) + } + return normalLogProb(x, n.mu, &n.chol, n.logSqrtDet) +} + +// NormalLogProb computes the log probability of the location x for a Normal +// distribution the given mean and Cholesky decomposition of the covariance matrix. +// NormalLogProb panics if len(x) is not equal to len(mu), or if len(mu) != chol.Size(). +// +// This function saves time and memory if the Cholesky decomposition is already +// available. Otherwise, the NewNormal function should be used. +func NormalLogProb(x, mu []float64, chol *mat.Cholesky) float64 { + dim := len(mu) + if len(x) != dim { + panic(badSizeMismatch) + } + if chol.SymmetricDim() != dim { + panic(badSizeMismatch) + } + logSqrtDet := 0.5 * chol.LogDet() + return normalLogProb(x, mu, chol, logSqrtDet) +} + +// normalLogProb is the same as NormalLogProb, but does not make size checks and +// additionally requires log(|Σ|^-0.5) +func normalLogProb(x, mu []float64, chol *mat.Cholesky, logSqrtDet float64) float64 { + dim := len(mu) + c := -0.5*float64(dim)*logTwoPi - logSqrtDet + dst := stat.Mahalanobis(mat.NewVecDense(dim, x), mat.NewVecDense(dim, mu), chol) + return c - 0.5*dst*dst +} + +// MarginalNormal returns the marginal distribution of the given input variables. +// That is, MarginalNormal returns +// +// p(x_i) = \int_{x_o} p(x_i | x_o) p(x_o) dx_o +// +// where x_i are the dimensions in the input, and x_o are the remaining dimensions. +// See https://en.wikipedia.org/wiki/Marginal_distribution for more information. +// +// The input src is passed to the call to NewNormal. +func (n *Normal) MarginalNormal(vars []int, src rand.Source) (*Normal, bool) { + newMean := make([]float64, len(vars)) + for i, v := range vars { + newMean[i] = n.mu[v] + } + var s mat.SymDense + s.SubsetSym(&n.sigma, vars) + return NewNormal(newMean, &s, src) +} + +// MarginalNormalSingle returns the marginal of the given input variable. +// That is, MarginalNormal returns +// +// p(x_i) = \int_{x_¬i} p(x_i | x_¬i) p(x_¬i) dx_¬i +// +// where i is the input index. +// See https://en.wikipedia.org/wiki/Marginal_distribution for more information. +// +// The input src is passed to the constructed distuv.Normal. +func (n *Normal) MarginalNormalSingle(i int, src rand.Source) distuv.Normal { + return distuv.Normal{ + Mu: n.mu[i], + Sigma: math.Sqrt(n.sigma.At(i, i)), + Src: src, + } +} + +// Mean returns the mean of the probability distribution. +// +// If dst is not nil, the mean will be stored in-place into dst and returned, +// otherwise a new slice will be allocated first. If dst is not nil, it must +// have length equal to the dimension of the distribution. +func (n *Normal) Mean(dst []float64) []float64 { + dst = reuseAs(dst, n.dim) + copy(dst, n.mu) + return dst +} + +// Prob computes the value of the probability density function at x. +func (n *Normal) Prob(x []float64) float64 { + return math.Exp(n.LogProb(x)) +} + +// Quantile returns the value of the multi-dimensional inverse cumulative +// distribution function at p. +// +// If dst is not nil, the quantile will be stored in-place into dst and +// returned, otherwise a new slice will be allocated first. If dst is not nil, +// it must have length equal to the dimension of the distribution. Quantile will +// also panic if the length of p is not equal to the dimension of the +// distribution. +// +// All of the values of p must be between 0 and 1, inclusive, or Quantile will +// panic. +func (n *Normal) Quantile(dst, p []float64) []float64 { + if len(p) != n.dim { + panic(badInputLength) + } + dst = reuseAs(dst, n.dim) + + // Transform to a standard normal and then transform to a multivariate Gaussian. + for i, v := range p { + dst[i] = distuv.UnitNormal.Quantile(v) + } + n.TransformNormal(dst, dst) + return dst +} + +// Rand generates a random sample according to the distribution. +// +// If dst is not nil, the sample will be stored in-place into dst and returned, +// otherwise a new slice will be allocated first. If dst is not nil, it must +// have length equal to the dimension of the distribution. +func (n *Normal) Rand(dst []float64) []float64 { + return NormalRand(dst, n.mu, &n.chol, n.src) +} + +// NormalRand generates a random sample from a multivariate normal distribution +// given by the mean and the Cholesky factorization of the covariance matrix. +// +// If dst is not nil, the sample will be stored in-place into dst and returned, +// otherwise a new slice will be allocated first. If dst is not nil, it must +// have length equal to the dimension of the distribution. +// +// This function saves time and memory if the Cholesky factorization is already +// available. Otherwise, the NewNormal function should be used. +func NormalRand(dst, mean []float64, chol *mat.Cholesky, src rand.Source) []float64 { + if len(mean) != chol.SymmetricDim() { + panic(badInputLength) + } + dst = reuseAs(dst, len(mean)) + + if src == nil { + for i := range dst { + dst[i] = rand.NormFloat64() + } + } else { + rnd := rand.New(src) + for i := range dst { + dst[i] = rnd.NormFloat64() + } + } + transformNormal(dst, dst, mean, chol) + return dst +} + +// EigenSym is an eigendecomposition of a symmetric matrix. +type EigenSym interface { + mat.Symmetric + // RawValues returns all eigenvalues in ascending order. The returned slice + // must not be modified. + RawValues() []float64 + // RawQ returns an orthogonal matrix whose columns contain the eigenvectors. + // The returned matrix must not be modified. + RawQ() mat.Matrix +} + +// PositivePartEigenSym is an EigenSym that sets any negative eigenvalues from +// the given eigendecomposition to zero but otherwise returns the values +// unchanged. +// +// This is useful for filtering eigenvalues of positive semi-definite matrices +// that are almost zero but negative due to rounding errors. +type PositivePartEigenSym struct { + ed *mat.EigenSym + vals []float64 +} + +var _ EigenSym = (*PositivePartEigenSym)(nil) +var _ EigenSym = (*mat.EigenSym)(nil) + +// NewPositivePartEigenSym returns a new PositivePartEigenSym, wrapping the +// given eigendecomposition. +func NewPositivePartEigenSym(ed *mat.EigenSym) *PositivePartEigenSym { + n := ed.SymmetricDim() + vals := make([]float64, n) + for i, lamda := range ed.RawValues() { + if lamda > 0 { + vals[i] = lamda + } + } + return &PositivePartEigenSym{ + ed: ed, + vals: vals, + } +} + +// SymmetricDim returns the value from the wrapped eigendecomposition. +func (ed *PositivePartEigenSym) SymmetricDim() int { return ed.ed.SymmetricDim() } + +// Dims returns the dimensions from the wrapped eigendecomposition. +func (ed *PositivePartEigenSym) Dims() (r, c int) { return ed.ed.Dims() } + +// At returns the value from the wrapped eigendecomposition. +func (ed *PositivePartEigenSym) At(i, j int) float64 { return ed.ed.At(i, j) } + +// T returns the transpose from the wrapped eigendecomposition. +func (ed *PositivePartEigenSym) T() mat.Matrix { return ed.ed.T() } + +// RawQ returns the orthogonal matrix Q from the wrapped eigendecomposition. The +// returned matrix must not be modified. +func (ed *PositivePartEigenSym) RawQ() mat.Matrix { return ed.ed.RawQ() } + +// RawValues returns the eigenvalues from the wrapped eigendecomposition in +// ascending order with any negative value replaced by zero. The returned slice +// must not be modified. +func (ed *PositivePartEigenSym) RawValues() []float64 { return ed.vals } + +// NormalRandCov generates a random sample from a multivariate normal +// distribution given by the mean and the covariance matrix. +// +// If dst is not nil, the sample will be stored in-place into dst and returned, +// otherwise a new slice will be allocated first. If dst is not nil, it must +// have length equal to the dimension of the distribution. +// +// cov should be *mat.Cholesky, *mat.PivotedCholesky or EigenSym, otherwise +// NormalRandCov will be very inefficient because a pivoted Cholesky +// factorization of cov will be computed for every sample. +// +// If cov is an EigenSym, all eigenvalues returned by RawValues must be +// non-negative, otherwise NormalRandCov will panic. +func NormalRandCov(dst, mean []float64, cov mat.Symmetric, src rand.Source) []float64 { + n := len(mean) + if cov.SymmetricDim() != n { + panic(badInputLength) + } + dst = reuseAs(dst, n) + if src == nil { + for i := range dst { + dst[i] = rand.NormFloat64() + } + } else { + rnd := rand.New(src) + for i := range dst { + dst[i] = rnd.NormFloat64() + } + } + + switch cov := cov.(type) { + case *mat.Cholesky: + dstVec := mat.NewVecDense(n, dst) + dstVec.MulVec(cov.RawU().T(), dstVec) + case *mat.PivotedCholesky: + dstVec := mat.NewVecDense(n, dst) + dstVec.MulVec(cov.RawU().T(), dstVec) + dstVec.Permute(cov.ColumnPivots(nil), true) + case EigenSym: + vals := cov.RawValues() + if vals[0] < 0 { + panic("distmv: covariance matrix is not positive semi-definite") + } + for i, val := range vals { + dst[i] *= math.Sqrt(val) + } + dstVec := mat.NewVecDense(n, dst) + dstVec.MulVec(cov.RawQ(), dstVec) + default: + var chol mat.PivotedCholesky + chol.Factorize(cov, -1) + dstVec := mat.NewVecDense(n, dst) + dstVec.MulVec(chol.RawU().T(), dstVec) + dstVec.Permute(chol.ColumnPivots(nil), true) + } + floats.Add(dst, mean) + + return dst +} + +// ScoreInput returns the gradient of the log-probability with respect to the +// input x. That is, ScoreInput computes +// +// ∇_x log(p(x)) +// +// If dst is not nil, the score will be stored in-place into dst and returned, +// otherwise a new slice will be allocated first. If dst is not nil, it must +// have length equal to the dimension of the distribution. +func (n *Normal) ScoreInput(dst, x []float64) []float64 { + // Normal log probability is + // c - 0.5*(x-μ)' Σ^-1 (x-μ). + // So the derivative is just + // -Σ^-1 (x-μ). + if len(x) != n.Dim() { + panic(badInputLength) + } + dst = reuseAs(dst, n.dim) + + floats.SubTo(dst, x, n.mu) + dstVec := mat.NewVecDense(len(dst), dst) + err := n.chol.SolveVecTo(dstVec, dstVec) + if err != nil { + panic(err) + } + floats.Scale(-1, dst) + return dst +} + +// SetMean changes the mean of the normal distribution. SetMean panics if len(mu) +// does not equal the dimension of the normal distribution. +func (n *Normal) SetMean(mu []float64) { + if len(mu) != n.Dim() { + panic(badSizeMismatch) + } + copy(n.mu, mu) +} + +// TransformNormal transforms x generated from a standard multivariate normal +// into a vector that has been generated under the normal distribution of the +// receiver. +// +// If dst is not nil, the result will be stored in-place into dst and returned, +// otherwise a new slice will be allocated first. If dst is not nil, it must +// have length equal to the dimension of the distribution. TransformNormal will +// also panic if the length of x is not equal to the dimension of the receiver. +func (n *Normal) TransformNormal(dst, x []float64) []float64 { + if len(x) != n.dim { + panic(badInputLength) + } + dst = reuseAs(dst, n.dim) + transformNormal(dst, x, n.mu, &n.chol) + return dst +} + +// transformNormal performs the same operation as Normal.TransformNormal except +// no safety checks are performed and all memory must be provided. +func transformNormal(dst, normal, mu []float64, chol *mat.Cholesky) []float64 { + dim := len(mu) + dstVec := mat.NewVecDense(dim, dst) + srcVec := mat.NewVecDense(dim, normal) + // If dst and normal are the same slice, make them the same Vector otherwise + // mat complains about being tricky. + if &normal[0] == &dst[0] { + srcVec = dstVec + } + dstVec.MulVec(chol.RawU().T(), srcVec) + floats.Add(dst, mu) + return dst +} diff --git a/vendor/gonum.org/v1/gonum/stat/distmv/statdist.go b/vendor/gonum.org/v1/gonum/stat/distmv/statdist.go new file mode 100644 index 0000000000..c835924c6e --- /dev/null +++ b/vendor/gonum.org/v1/gonum/stat/distmv/statdist.go @@ -0,0 +1,390 @@ +// Copyright ©2016 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package distmv + +import ( + "math" + + "gonum.org/v1/gonum/floats" + "gonum.org/v1/gonum/mat" + "gonum.org/v1/gonum/mathext" + "gonum.org/v1/gonum/spatial/r1" + "gonum.org/v1/gonum/stat" +) + +// Bhattacharyya is a type for computing the Bhattacharyya distance between +// probability distributions. +// +// The Bhattacharyya distance is defined as +// +// D_B = -ln(BC(l,r)) +// BC = \int_-∞^∞ (p(x)q(x))^(1/2) dx +// +// Where BC is known as the Bhattacharyya coefficient. +// The Bhattacharyya distance is related to the Hellinger distance by +// +// H(l,r) = sqrt(1-BC(l,r)) +// +// For more information, see +// +// https://en.wikipedia.org/wiki/Bhattacharyya_distance +type Bhattacharyya struct{} + +// DistNormal computes the Bhattacharyya distance between normal distributions l and r. +// The dimensions of the input distributions must match or DistNormal will panic. +// +// For Normal distributions, the Bhattacharyya distance is +// +// Σ = (Σ_l + Σ_r)/2 +// D_B = (1/8)*(μ_l - μ_r)ᵀ*Σ^-1*(μ_l - μ_r) + (1/2)*ln(det(Σ)/(det(Σ_l)*det(Σ_r))^(1/2)) +func (Bhattacharyya) DistNormal(l, r *Normal) float64 { + dim := l.Dim() + if dim != r.Dim() { + panic(badSizeMismatch) + } + + var sigma mat.SymDense + sigma.AddSym(&l.sigma, &r.sigma) + sigma.ScaleSym(0.5, &sigma) + + var chol mat.Cholesky + chol.Factorize(&sigma) + + mahalanobis := stat.Mahalanobis(mat.NewVecDense(dim, l.mu), mat.NewVecDense(dim, r.mu), &chol) + mahalanobisSq := mahalanobis * mahalanobis + + dl := l.chol.LogDet() + dr := r.chol.LogDet() + ds := chol.LogDet() + + return 0.125*mahalanobisSq + 0.5*ds - 0.25*dl - 0.25*dr +} + +// DistUniform computes the Bhattacharyya distance between uniform distributions l and r. +// The dimensions of the input distributions must match or DistUniform will panic. +func (Bhattacharyya) DistUniform(l, r *Uniform) float64 { + if len(l.bounds) != len(r.bounds) { + panic(badSizeMismatch) + } + // BC = \int \sqrt(p(x)q(x)), which for uniform distributions is a constant + // over the volume where both distributions have positive probability. + // Compute the overlap and the value of sqrt(p(x)q(x)). The entropy is the + // negative log probability of the distribution (use instead of LogProb so + // it is not necessary to construct an x value). + // + // BC = volume * sqrt(p(x)q(x)) + // logBC = log(volume) + 0.5*(logP + logQ) + // D_B = -logBC + return -unifLogVolOverlap(l.bounds, r.bounds) + 0.5*(l.Entropy()+r.Entropy()) +} + +// unifLogVolOverlap computes the log of the volume of the hyper-rectangle where +// both uniform distributions have positive probability. +func unifLogVolOverlap(b1, b2 []r1.Interval) float64 { + var logVolOverlap float64 + for dim, v1 := range b1 { + v2 := b2[dim] + // If the surfaces don't overlap, then the volume is 0 + if v1.Max <= v2.Min || v2.Max <= v1.Min { + return math.Inf(-1) + } + vol := math.Min(v1.Max, v2.Max) - math.Max(v1.Min, v2.Min) + logVolOverlap += math.Log(vol) + } + return logVolOverlap +} + +// CrossEntropy is a type for computing the cross-entropy between probability +// distributions. +// +// The cross-entropy is defined as +// - \int_x l(x) log(r(x)) dx = KL(l || r) + H(l) +// +// where KL is the Kullback-Leibler divergence and H is the entropy. +// For more information, see +// +// https://en.wikipedia.org/wiki/Cross_entropy +type CrossEntropy struct{} + +// DistNormal returns the cross-entropy between normal distributions l and r. +// The dimensions of the input distributions must match or DistNormal will panic. +func (CrossEntropy) DistNormal(l, r *Normal) float64 { + if l.Dim() != r.Dim() { + panic(badSizeMismatch) + } + kl := KullbackLeibler{}.DistNormal(l, r) + return kl + l.Entropy() +} + +// Hellinger is a type for computing the Hellinger distance between probability +// distributions. +// +// The Hellinger distance is defined as +// +// H^2(l,r) = 1/2 * int_x (\sqrt(l(x)) - \sqrt(r(x)))^2 dx +// +// and is bounded between 0 and 1. Note the above formula defines the squared +// Hellinger distance, while this returns the Hellinger distance itself. +// The Hellinger distance is related to the Bhattacharyya distance by +// +// H^2 = 1 - exp(-D_B) +// +// For more information, see +// +// https://en.wikipedia.org/wiki/Hellinger_distance +type Hellinger struct{} + +// DistNormal returns the Hellinger distance between normal distributions l and r. +// The dimensions of the input distributions must match or DistNormal will panic. +// +// See the documentation of Bhattacharyya.DistNormal for the formula for Normal +// distributions. +func (Hellinger) DistNormal(l, r *Normal) float64 { + if l.Dim() != r.Dim() { + panic(badSizeMismatch) + } + db := Bhattacharyya{}.DistNormal(l, r) + bc := math.Exp(-db) + return math.Sqrt(1 - bc) +} + +// KullbackLeibler is a type for computing the Kullback-Leibler divergence from l to r. +// +// The Kullback-Leibler divergence is defined as +// +// D_KL(l || r ) = \int_x p(x) log(p(x)/q(x)) dx +// +// Note that the Kullback-Leibler divergence is not symmetric with respect to +// the order of the input arguments. +type KullbackLeibler struct{} + +// DistDirichlet returns the Kullback-Leibler divergence between Dirichlet +// distributions l and r. The dimensions of the input distributions must match +// or DistDirichlet will panic. +// +// For two Dirichlet distributions, the KL divergence is computed as +// +// D_KL(l || r) = log Γ(α_0_l) - \sum_i log Γ(α_i_l) - log Γ(α_0_r) + \sum_i log Γ(α_i_r) +// + \sum_i (α_i_l - α_i_r)(ψ(α_i_l)- ψ(α_0_l)) +// +// Where Γ is the gamma function, ψ is the digamma function, and α_0 is the +// sum of the Dirichlet parameters. +func (KullbackLeibler) DistDirichlet(l, r *Dirichlet) float64 { + // http://bariskurt.com/kullback-leibler-divergence-between-two-dirichlet-and-beta-distributions/ + if l.Dim() != r.Dim() { + panic(badSizeMismatch) + } + l0, _ := math.Lgamma(l.sumAlpha) + r0, _ := math.Lgamma(r.sumAlpha) + dl := mathext.Digamma(l.sumAlpha) + + var l1, r1, c float64 + for i, al := range l.alpha { + ar := r.alpha[i] + vl, _ := math.Lgamma(al) + l1 += vl + vr, _ := math.Lgamma(ar) + r1 += vr + c += (al - ar) * (mathext.Digamma(al) - dl) + } + return l0 - l1 - r0 + r1 + c +} + +// DistNormal returns the KullbackLeibler divergence between normal distributions l and r. +// The dimensions of the input distributions must match or DistNormal will panic. +// +// For two normal distributions, the KL divergence is computed as +// +// D_KL(l || r) = 0.5*[ln(|Σ_r|) - ln(|Σ_l|) + (μ_l - μ_r)ᵀ*Σ_r^-1*(μ_l - μ_r) + tr(Σ_r^-1*Σ_l)-d] +func (KullbackLeibler) DistNormal(l, r *Normal) float64 { + dim := l.Dim() + if dim != r.Dim() { + panic(badSizeMismatch) + } + + mahalanobis := stat.Mahalanobis(mat.NewVecDense(dim, l.mu), mat.NewVecDense(dim, r.mu), &r.chol) + mahalanobisSq := mahalanobis * mahalanobis + + // TODO(btracey): Optimize where there is a SolveCholeskySym + // TODO(btracey): There may be a more efficient way to just compute the trace + // Compute tr(Σ_r^-1*Σ_l) using the fact that Σ_l = Uᵀ * U + var u mat.TriDense + l.chol.UTo(&u) + var m mat.Dense + err := r.chol.SolveTo(&m, u.T()) + if err != nil { + return math.NaN() + } + m.Mul(&m, &u) + tr := mat.Trace(&m) + + return r.logSqrtDet - l.logSqrtDet + 0.5*(mahalanobisSq+tr-float64(l.dim)) +} + +// DistUniform returns the KullbackLeibler divergence between uniform distributions +// l and r. The dimensions of the input distributions must match or DistUniform +// will panic. +func (KullbackLeibler) DistUniform(l, r *Uniform) float64 { + bl := l.Bounds(nil) + br := r.Bounds(nil) + if len(bl) != len(br) { + panic(badSizeMismatch) + } + + // The KL is ∞ if l is not completely contained within r, because then + // r(x) is zero when l(x) is non-zero for some x. + contained := true + for i, v := range bl { + if v.Min < br[i].Min || br[i].Max < v.Max { + contained = false + break + } + } + if !contained { + return math.Inf(1) + } + + // The KL divergence is finite. + // + // KL defines 0*ln(0) = 0, so there is no contribution to KL where l(x) = 0. + // Inside the region, l(x) and r(x) are constant (uniform distribution), and + // this constant is integrated over l(x), which integrates out to one. + // The entropy is -log(p(x)). + logPx := -l.Entropy() + logQx := -r.Entropy() + return logPx - logQx +} + +// Renyi is a type for computing the Rényi divergence of order α from l to r. +// +// The Rényi divergence with α > 0, α ≠ 1 is defined as +// +// D_α(l || r) = 1/(α-1) log(\int_-∞^∞ l(x)^α r(x)^(1-α)dx) +// +// The Rényi divergence has special forms for α = 0 and α = 1. This type does +// not implement α = ∞. For α = 0, +// +// D_0(l || r) = -log \int_-∞^∞ r(x)1{p(x)>0} dx +// +// that is, the negative log probability under r(x) that l(x) > 0. +// When α = 1, the Rényi divergence is equal to the Kullback-Leibler divergence. +// The Rényi divergence is also equal to half the Bhattacharyya distance when α = 0.5. +// +// The parameter α must be in 0 ≤ α < ∞ or the distance functions will panic. +type Renyi struct { + Alpha float64 +} + +// DistNormal returns the Rényi divergence between normal distributions l and r. +// The dimensions of the input distributions must match or DistNormal will panic. +// +// For two normal distributions, the Rényi divergence is computed as +// +// Σ_α = (1-α) Σ_l + αΣ_r +// D_α(l||r) = α/2 * (μ_l - μ_r)'*Σ_α^-1*(μ_l - μ_r) + 1/(2(α-1))*ln(|Σ_λ|/(|Σ_l|^(1-α)*|Σ_r|^α)) +// +// For a more nicely formatted version of the formula, see Eq. 15 of +// +// Kolchinsky, Artemy, and Brendan D. Tracey. "Estimating Mixture Entropy +// with Pairwise Distances." arXiv preprint arXiv:1706.02419 (2017). +// +// Note that the this formula is for Chernoff divergence, which differs from +// Rényi divergence by a factor of 1-α. Also be aware that most sources in +// the literature report this formula incorrectly. +func (renyi Renyi) DistNormal(l, r *Normal) float64 { + if renyi.Alpha < 0 { + panic("renyi: alpha < 0") + } + dim := l.Dim() + if dim != r.Dim() { + panic(badSizeMismatch) + } + if renyi.Alpha == 0 { + return 0 + } + if renyi.Alpha == 1 { + return KullbackLeibler{}.DistNormal(l, r) + } + + logDetL := l.chol.LogDet() + logDetR := r.chol.LogDet() + + // Σ_α = (1-α)Σ_l + αΣ_r. + sigA := mat.NewSymDense(dim, nil) + for i := 0; i < dim; i++ { + for j := i; j < dim; j++ { + v := (1-renyi.Alpha)*l.sigma.At(i, j) + renyi.Alpha*r.sigma.At(i, j) + sigA.SetSym(i, j, v) + } + } + + var chol mat.Cholesky + ok := chol.Factorize(sigA) + if !ok { + return math.NaN() + } + logDetA := chol.LogDet() + + mahalanobis := stat.Mahalanobis(mat.NewVecDense(dim, l.mu), mat.NewVecDense(dim, r.mu), &chol) + mahalanobisSq := mahalanobis * mahalanobis + + return (renyi.Alpha/2)*mahalanobisSq + 1/(2*(1-renyi.Alpha))*(logDetA-(1-renyi.Alpha)*logDetL-renyi.Alpha*logDetR) +} + +// Wasserstein is a type for computing the Wasserstein distance between two +// probability distributions. +// +// The Wasserstein distance is defined as +// +// W(l,r) := inf 𝔼(||X-Y||_2^2)^1/2 +// +// For more information, see +// +// https://en.wikipedia.org/wiki/Wasserstein_metric +type Wasserstein struct{} + +// DistNormal returns the Wasserstein distance between normal distributions l and r. +// The dimensions of the input distributions must match or DistNormal will panic. +// +// The Wasserstein distance for Normal distributions is +// +// d^2 = ||m_l - m_r||_2^2 + Tr(Σ_l + Σ_r - 2(Σ_l^(1/2)*Σ_r*Σ_l^(1/2))^(1/2)) +// +// For more information, see +// +// http://djalil.chafai.net/blog/2010/04/30/wasserstein-distance-between-two-gaussians/ +func (Wasserstein) DistNormal(l, r *Normal) float64 { + dim := l.Dim() + if dim != r.Dim() { + panic(badSizeMismatch) + } + + d := floats.Distance(l.mu, r.mu, 2) + d = d * d + + // Compute Σ_l^(1/2) + var ssl mat.SymDense + err := ssl.PowPSD(&l.sigma, 0.5) + if err != nil { + panic(err) + } + // Compute Σ_l^(1/2)*Σ_r*Σ_l^(1/2) + var mean mat.Dense + mean.Mul(&ssl, &r.sigma) + mean.Mul(&mean, &ssl) + + // Reinterpret as symdense, and take Σ^(1/2) + meanSym := mat.NewSymDense(dim, mean.RawMatrix().Data) + err = ssl.PowPSD(meanSym, 0.5) + if err != nil { + panic(err) + } + + tr := mat.Trace(&r.sigma) + tl := mat.Trace(&l.sigma) + tm := mat.Trace(&ssl) + + return d + tl + tr - 2*tm +} diff --git a/vendor/gonum.org/v1/gonum/stat/distmv/studentst.go b/vendor/gonum.org/v1/gonum/stat/distmv/studentst.go new file mode 100644 index 0000000000..7dee85b6ae --- /dev/null +++ b/vendor/gonum.org/v1/gonum/stat/distmv/studentst.go @@ -0,0 +1,362 @@ +// Copyright ©2016 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package distmv + +import ( + "math" + "math/rand/v2" + "sort" + + "golang.org/x/tools/container/intsets" + + "gonum.org/v1/gonum/floats" + "gonum.org/v1/gonum/mat" + "gonum.org/v1/gonum/stat" + "gonum.org/v1/gonum/stat/distuv" +) + +// StudentsT is a multivariate Student's T distribution. It is a distribution over +// ℝ^n with the probability density +// +// p(y) = (Γ((ν+n)/2) / Γ(ν/2)) * (νπ)^(-n/2) * |Ʃ|^(-1/2) * +// (1 + 1/ν * (y-μ)ᵀ * Ʃ^-1 * (y-μ))^(-(ν+n)/2) +// +// where ν is a scalar greater than 2, μ is a vector in ℝ^n, and Ʃ is an n×n +// symmetric positive definite matrix. +// +// In this distribution, ν sets the spread of the distribution, similar to +// the degrees of freedom in a univariate Student's T distribution. As ν → ∞, +// the distribution approaches a multi-variate normal distribution. +// μ is the mean of the distribution, and the covariance is ν/(ν-2)*Ʃ. +// +// See https://en.wikipedia.org/wiki/Student%27s_t-distribution and +// http://users.isy.liu.se/en/rt/roth/student.pdf for more information. +type StudentsT struct { + nu float64 + mu []float64 + // If src is altered, rnd must be updated. + src rand.Source + rnd *rand.Rand + + sigma mat.SymDense // only stored if needed + + chol mat.Cholesky + lower mat.TriDense + logSqrtDet float64 + dim int +} + +// NewStudentsT creates a new StudentsT with the given nu, mu, and sigma +// parameters. +// +// NewStudentsT panics if len(mu) == 0, or if len(mu) != sigma.SymmetricDim(). If +// the covariance matrix is not positive-definite, nil is returned and ok is false. +func NewStudentsT(mu []float64, sigma mat.Symmetric, nu float64, src rand.Source) (dist *StudentsT, ok bool) { + if len(mu) == 0 { + panic(badZeroDimension) + } + dim := sigma.SymmetricDim() + if dim != len(mu) { + panic(badSizeMismatch) + } + + s := &StudentsT{ + nu: nu, + mu: make([]float64, dim), + dim: dim, + src: src, + } + if src != nil { + s.rnd = rand.New(src) + } + copy(s.mu, mu) + + ok = s.chol.Factorize(sigma) + if !ok { + return nil, false + } + s.sigma = *mat.NewSymDense(dim, nil) + s.sigma.CopySym(sigma) + s.chol.LTo(&s.lower) + s.logSqrtDet = 0.5 * s.chol.LogDet() + return s, true +} + +// ConditionStudentsT returns the Student's T distribution that is the receiver +// conditioned on the input evidence, and the success of the operation. +// The returned Student's T has dimension +// n - len(observed), where n is the dimension of the original receiver. +// The dimension order is preserved during conditioning, so if the value +// of dimension 1 is observed, the returned normal represents dimensions {0, 2, ...} +// of the original Student's T distribution. +// +// ok indicates whether there was a failure during the update. If ok is false +// the operation failed and dist is not usable. +// Mathematically this is impossible, but can occur with finite precision arithmetic. +func (s *StudentsT) ConditionStudentsT(observed []int, values []float64, src rand.Source) (dist *StudentsT, ok bool) { + if len(observed) == 0 { + panic("studentst: no observed value") + } + if len(observed) != len(values) { + panic(badInputLength) + } + + for _, v := range observed { + if v < 0 || v >= s.dim { + panic("studentst: observed value out of bounds") + } + } + + newNu, newMean, newSigma := studentsTConditional(observed, values, s.nu, s.mu, &s.sigma) + if newMean == nil { + return nil, false + } + + return NewStudentsT(newMean, newSigma, newNu, src) + +} + +// studentsTConditional updates a Student's T distribution based on the observed samples +// (see documentation for the public function). The Gaussian conditional update +// is treated as a special case when nu == math.Inf(1). +func studentsTConditional(observed []int, values []float64, nu float64, mu []float64, sigma mat.Symmetric) (newNu float64, newMean []float64, newSigma *mat.SymDense) { + dim := len(mu) + ob := len(observed) + + unobserved := findUnob(observed, dim) + + unob := len(unobserved) + if unob == 0 { + panic("stat: all dimensions observed") + } + + mu1 := make([]float64, unob) + for i, v := range unobserved { + mu1[i] = mu[v] + } + mu2 := make([]float64, ob) // really v - mu2 + for i, v := range observed { + mu2[i] = values[i] - mu[v] + } + + var sigma11, sigma22 mat.SymDense + sigma11.SubsetSym(sigma, unobserved) + sigma22.SubsetSym(sigma, observed) + + sigma21 := mat.NewDense(ob, unob, nil) + for i, r := range observed { + for j, c := range unobserved { + v := sigma.At(r, c) + sigma21.Set(i, j, v) + } + } + + var chol mat.Cholesky + ok := chol.Factorize(&sigma22) + if !ok { + return math.NaN(), nil, nil + } + + // Compute mu_1 + sigma_{2,1}ᵀ * sigma_{2,2}^-1 (v - mu_2). + v := mat.NewVecDense(ob, mu2) + var tmp, tmp2 mat.VecDense + err := chol.SolveVecTo(&tmp, v) + if err != nil { + return math.NaN(), nil, nil + } + tmp2.MulVec(sigma21.T(), &tmp) + + for i := range mu1 { + mu1[i] += tmp2.At(i, 0) + } + + // Compute tmp4 = sigma_{2,1}ᵀ * sigma_{2,2}^-1 * sigma_{2,1}. + // TODO(btracey): Should this be a method of SymDense? + var tmp3, tmp4 mat.Dense + err = chol.SolveTo(&tmp3, sigma21) + if err != nil { + return math.NaN(), nil, nil + } + tmp4.Mul(sigma21.T(), &tmp3) + + // Compute sigma_{1,1} - tmp4 + // TODO(btracey): If tmp4 can constructed with a method, then this can be + // replaced with SubSym. + for i := 0; i < len(unobserved); i++ { + for j := i; j < len(unobserved); j++ { + v := sigma11.At(i, j) + sigma11.SetSym(i, j, v-tmp4.At(i, j)) + } + } + + // The computed variables are accurate for a Normal. + if math.IsInf(nu, 1) { + return nu, mu1, &sigma11 + } + + // Compute beta = (v - mu_2)ᵀ * sigma_{2,2}^-1 * (v - mu_2)ᵀ + beta := mat.Dot(v, &tmp) + + // Scale the covariance matrix + sigma11.ScaleSym((nu+beta)/(nu+float64(ob)), &sigma11) + + return nu + float64(ob), mu1, &sigma11 +} + +// findUnob returns the unobserved variables (the complementary set to observed). +// findUnob panics if any value repeated in observed. +func findUnob(observed []int, dim int) (unobserved []int) { + var setOb intsets.Sparse + for _, v := range observed { + setOb.Insert(v) + } + var setAll intsets.Sparse + for i := 0; i < dim; i++ { + setAll.Insert(i) + } + var setUnob intsets.Sparse + setUnob.Difference(&setAll, &setOb) + unobserved = setUnob.AppendTo(nil) + sort.Ints(unobserved) + return unobserved +} + +// CovarianceMatrix calculates the covariance matrix of the distribution, +// storing the result in dst. Upon return, the value at element {i, j} of the +// covariance matrix is equal to the covariance of the i^th and j^th variables. +// +// covariance(i, j) = E[(x_i - E[x_i])(x_j - E[x_j])] +// +// If the dst matrix is empty it will be resized to the correct dimensions, +// otherwise dst must match the dimension of the receiver or CovarianceMatrix +// will panic. +func (st *StudentsT) CovarianceMatrix(dst *mat.SymDense) { + if dst.IsEmpty() { + *dst = *(dst.GrowSym(st.dim).(*mat.SymDense)) + } else if dst.SymmetricDim() != st.dim { + panic("studentst: input matrix size mismatch") + } + dst.CopySym(&st.sigma) + dst.ScaleSym(st.nu/(st.nu-2), dst) +} + +// Dim returns the dimension of the distribution. +func (s *StudentsT) Dim() int { + return s.dim +} + +// LogProb computes the log of the pdf of the point x. +func (s *StudentsT) LogProb(y []float64) float64 { + if len(y) != s.dim { + panic(badInputLength) + } + + nu := s.nu + n := float64(s.dim) + lg1, _ := math.Lgamma((nu + n) / 2) + lg2, _ := math.Lgamma(nu / 2) + + t1 := lg1 - lg2 - n/2*math.Log(nu*math.Pi) - s.logSqrtDet + + mahal := stat.Mahalanobis(mat.NewVecDense(len(y), y), mat.NewVecDense(len(s.mu), s.mu), &s.chol) + mahal *= mahal + return t1 - ((nu+n)/2)*math.Log(1+mahal/nu) +} + +// MarginalStudentsT returns the marginal distribution of the given input variables, +// and the success of the operation. +// That is, MarginalStudentsT returns +// +// p(x_i) = \int_{x_o} p(x_i | x_o) p(x_o) dx_o +// +// where x_i are the dimensions in the input, and x_o are the remaining dimensions. +// See https://en.wikipedia.org/wiki/Marginal_distribution for more information. +// +// The input src is passed to the created StudentsT. +// +// ok indicates whether there was a failure during the marginalization. If ok is false +// the operation failed and dist is not usable. +// Mathematically this is impossible, but can occur with finite precision arithmetic. +func (s *StudentsT) MarginalStudentsT(vars []int, src rand.Source) (dist *StudentsT, ok bool) { + newMean := make([]float64, len(vars)) + for i, v := range vars { + newMean[i] = s.mu[v] + } + var newSigma mat.SymDense + newSigma.SubsetSym(&s.sigma, vars) + return NewStudentsT(newMean, &newSigma, s.nu, src) +} + +// MarginalStudentsTSingle returns the marginal distribution of the given input variable. +// That is, MarginalStudentsTSingle returns +// +// p(x_i) = \int_{x_o} p(x_i | x_o) p(x_o) dx_o +// +// where i is the input index, and x_o are the remaining dimensions. +// See https://en.wikipedia.org/wiki/Marginal_distribution for more information. +// +// The input src is passed to the call to NewStudentsT. +func (s *StudentsT) MarginalStudentsTSingle(i int, src rand.Source) distuv.StudentsT { + return distuv.StudentsT{ + Mu: s.mu[i], + Sigma: math.Sqrt(s.sigma.At(i, i)), + Nu: s.nu, + Src: src, + } +} + +// TODO(btracey): Implement marginal single. Need to modify univariate StudentsT +// to be three-parameter. + +// Mean returns the mean of the probability distribution. +// +// If dst is not nil, the mean will be stored in-place into dst and returned, +// otherwise a new slice will be allocated first. If dst is not nil, it must +// have length equal to the dimension of the distribution. +func (s *StudentsT) Mean(dst []float64) []float64 { + dst = reuseAs(dst, s.dim) + copy(dst, s.mu) + return dst +} + +// Nu returns the degrees of freedom parameter of the distribution. +func (s *StudentsT) Nu() float64 { + return s.nu +} + +// Prob computes the value of the probability density function at x. +func (s *StudentsT) Prob(y []float64) float64 { + return math.Exp(s.LogProb(y)) +} + +// Rand generates a random sample according to the distribution. +// +// If dst is not nil, the sample will be stored in-place into dst and returned, +// otherwise a new slice will be allocated first. If dst is not nil, it must +// have length equal to the dimension of the distribution. +func (s *StudentsT) Rand(dst []float64) []float64 { + // If Y is distributed according to N(0,Sigma), and U is chi^2 with + // parameter ν, then + // X = mu + Y * sqrt(nu / U) + // X is distributed according to this distribution. + + // Generate Y. + dst = reuseAs(dst, s.dim) + if s.rnd == nil { + for i := range dst { + dst[i] = rand.NormFloat64() + } + } else { + for i := range dst { + dst[i] = s.rnd.NormFloat64() + } + } + y := mat.NewVecDense(s.dim, dst) + y.MulVec(&s.lower, y) + // Compute mu + Y*sqrt(nu/U) + u := distuv.ChiSquared{K: s.nu, Src: s.src}.Rand() + floats.AddScaledTo(dst, s.mu, math.Sqrt(s.nu/u), dst) + return dst +} diff --git a/vendor/gonum.org/v1/gonum/stat/distmv/uniform.go b/vendor/gonum.org/v1/gonum/stat/distmv/uniform.go new file mode 100644 index 0000000000..81d8cba6d7 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/stat/distmv/uniform.go @@ -0,0 +1,200 @@ +// Copyright ©2015 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package distmv + +import ( + "math" + "math/rand/v2" + + "gonum.org/v1/gonum/spatial/r1" +) + +// Uniform represents a multivariate uniform distribution. +type Uniform struct { + bounds []r1.Interval + dim int + rnd *rand.Rand +} + +// NewUniform creates a new uniform distribution with the given bounds. +func NewUniform(bnds []r1.Interval, src rand.Source) *Uniform { + dim := len(bnds) + if dim == 0 { + panic(badZeroDimension) + } + for _, b := range bnds { + if b.Max < b.Min { + panic("uniform: maximum less than minimum") + } + } + u := &Uniform{ + bounds: make([]r1.Interval, dim), + dim: dim, + } + if src != nil { + u.rnd = rand.New(src) + } + for i, b := range bnds { + u.bounds[i].Min = b.Min + u.bounds[i].Max = b.Max + } + return u +} + +// NewUnitUniform creates a new Uniform distribution over the dim-dimensional +// unit hypercube. That is, a uniform distribution where each dimension has +// Min = 0 and Max = 1. +func NewUnitUniform(dim int, src rand.Source) *Uniform { + if dim <= 0 { + panic(nonPosDimension) + } + bounds := make([]r1.Interval, dim) + for i := range bounds { + bounds[i].Min = 0 + bounds[i].Max = 1 + } + u := Uniform{ + bounds: bounds, + dim: dim, + } + if src != nil { + u.rnd = rand.New(src) + } + return &u +} + +// Bounds returns the bounds on the variables of the distribution. +// +// If dst is not nil, the bounds will be stored in-place into dst and returned, +// otherwise a new slice will be allocated first. If dst is not nil, it must +// have length equal to the dimension of the distribution. +func (u *Uniform) Bounds(bounds []r1.Interval) []r1.Interval { + if bounds == nil { + bounds = make([]r1.Interval, u.Dim()) + } + if len(bounds) != u.Dim() { + panic(badInputLength) + } + copy(bounds, u.bounds) + return bounds +} + +// CDF returns the value of the multidimensional cumulative distribution +// function of the probability distribution at the point x. +// +// If dst is not nil, the value will be stored in-place into dst and returned, +// otherwise a new slice will be allocated first. If dst is not nil, it must +// have length equal to the dimension of the distribution. CDF will also panic +// if the length of x is not equal to the dimension of the distribution. +func (u *Uniform) CDF(dst, x []float64) []float64 { + if len(x) != u.dim { + panic(badSizeMismatch) + } + dst = reuseAs(dst, u.dim) + + for i, v := range x { + if v < u.bounds[i].Min { + dst[i] = 0 + } else if v > u.bounds[i].Max { + dst[i] = 1 + } else { + dst[i] = (v - u.bounds[i].Min) / (u.bounds[i].Max - u.bounds[i].Min) + } + } + return dst +} + +// Dim returns the dimension of the distribution. +func (u *Uniform) Dim() int { + return u.dim +} + +// Entropy returns the differential entropy of the distribution. +func (u *Uniform) Entropy() float64 { + // Entropy is log of the volume. + var logVol float64 + for _, b := range u.bounds { + logVol += math.Log(b.Max - b.Min) + } + return logVol +} + +// LogProb computes the log of the pdf of the point x. +func (u *Uniform) LogProb(x []float64) float64 { + dim := u.dim + if len(x) != dim { + panic(badSizeMismatch) + } + var logprob float64 + for i, b := range u.bounds { + if x[i] < b.Min || x[i] > b.Max { + return math.Inf(-1) + } + logprob -= math.Log(b.Max - b.Min) + } + return logprob +} + +// Mean returns the mean of the probability distribution. +// +// If dst is not nil, the mean will be stored in-place into dst and returned, +// otherwise a new slice will be allocated first. If dst is not nil, it must +// have length equal to the dimension of the distribution. +func (u *Uniform) Mean(dst []float64) []float64 { + dst = reuseAs(dst, u.dim) + for i, b := range u.bounds { + dst[i] = (b.Max + b.Min) / 2 + } + return dst +} + +// Prob computes the value of the probability density function at x. +func (u *Uniform) Prob(x []float64) float64 { + return math.Exp(u.LogProb(x)) +} + +// Rand generates a random sample according to the distribution. +// +// If dst is not nil, the sample will be stored in-place into dst and returned, +// otherwise a new slice will be allocated first. If dst is not nil, it must +// have length equal to the dimension of the distribution. +func (u *Uniform) Rand(dst []float64) []float64 { + dst = reuseAs(dst, u.dim) + if u.rnd == nil { + for i, b := range u.bounds { + dst[i] = rand.Float64()*(b.Max-b.Min) + b.Min + } + return dst + } + for i, b := range u.bounds { + dst[i] = u.rnd.Float64()*(b.Max-b.Min) + b.Min + } + return dst +} + +// Quantile returns the value of the multi-dimensional inverse cumulative +// distribution function at p. +// +// If dst is not nil, the quantile will be stored in-place into dst and +// returned, otherwise a new slice will be allocated first. If dst is not nil, +// it must have length equal to the dimension of the distribution. Quantile will +// also panic if the length of p is not equal to the dimension of the +// distribution. +// +// All of the values of p must be between 0 and 1, inclusive, or Quantile will +// panic. +func (u *Uniform) Quantile(dst, p []float64) []float64 { + if len(p) != u.dim { + panic(badSizeMismatch) + } + dst = reuseAs(dst, u.dim) + for i, v := range p { + if v < 0 || v > 1 { + panic(badQuantile) + } + dst[i] = v*(u.bounds[i].Max-u.bounds[i].Min) + u.bounds[i].Min + } + return dst +} diff --git a/vendor/gonum.org/v1/gonum/stat/distuv/alphastable.go b/vendor/gonum.org/v1/gonum/stat/distuv/alphastable.go new file mode 100644 index 0000000000..20ffeac620 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/stat/distuv/alphastable.go @@ -0,0 +1,112 @@ +// Copyright ©2020 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package distuv + +import ( + "math" + "math/rand/v2" +) + +// AlphaStable represents an α-stable distribution with four parameters. +// See https://en.wikipedia.org/wiki/Stable_distribution for more information. +type AlphaStable struct { + // Alpha is the stability parameter. + // It is valid within the range 0 < α ≤ 2. + Alpha float64 + // Beta is the skewness parameter. + // It is valid within the range -1 ≤ β ≤ 1. + Beta float64 + // C is the scale parameter. + // It is valid when positive. + C float64 + // Mu is the location parameter. + Mu float64 + Src rand.Source +} + +// ExKurtosis returns the excess kurtosis of the distribution. +// ExKurtosis returns NaN when Alpha != 2. +func (a AlphaStable) ExKurtosis() float64 { + if a.Alpha == 2 { + return 0 + } + return math.NaN() +} + +// Mean returns the mean of the probability distribution. +// Mean returns NaN when Alpha <= 1. +func (a AlphaStable) Mean() float64 { + if a.Alpha > 1 { + return a.Mu + } + return math.NaN() +} + +// Median returns the median of the distribution. +// Median panics when Beta != 0, because then the mode is not analytically +// expressible. +func (a AlphaStable) Median() float64 { + if a.Beta == 0 { + return a.Mu + } + panic("distuv: cannot compute Median for Beta != 0") +} + +// Mode returns the mode of the distribution. +// Mode panics when Beta != 0, because then the mode is not analytically +// expressible. +func (a AlphaStable) Mode() float64 { + if a.Beta == 0 { + return a.Mu + } + panic("distuv: cannot compute Mode for Beta != 0") +} + +// NumParameters returns the number of parameters in the distribution. +func (a AlphaStable) NumParameters() int { + return 4 +} + +// Rand returns a random sample drawn from the distribution. +func (a AlphaStable) Rand() float64 { + // From https://en.wikipedia.org/wiki/Stable_distribution#Simulation_of_stable_variables + const halfPi = math.Pi / 2 + u := Uniform{-halfPi, halfPi, a.Src}.Rand() + w := Exponential{1, a.Src}.Rand() + if a.Alpha == 1 { + f := halfPi + a.Beta*u + x := (f*math.Tan(u) - a.Beta*math.Log(halfPi*w*math.Cos(u)/f)) / halfPi + return a.C*(x+a.Beta*math.Log(a.C)/halfPi) + a.Mu + } + zeta := -a.Beta * math.Tan(halfPi*a.Alpha) + xi := math.Atan(-zeta) / a.Alpha + f := a.Alpha * (u + xi) + g := math.Sqrt(1+zeta*zeta) * math.Pow(math.Cos(u-f)/w, 1-a.Alpha) / math.Cos(u) + x := math.Pow(g, 1/a.Alpha) * math.Sin(f) + return a.C*x + a.Mu +} + +// Skewness returns the skewness of the distribution. +// Skewness returns NaN when Alpha != 2. +func (a AlphaStable) Skewness() float64 { + if a.Alpha == 2 { + return 0 + } + return math.NaN() +} + +// StdDev returns the standard deviation of the probability distribution. +func (a AlphaStable) StdDev() float64 { + return math.Sqrt(a.Variance()) +} + +// Variance returns the variance of the probability distribution. +// Variance returns +Inf when Alpha != 2. +func (a AlphaStable) Variance() float64 { + if a.Alpha == 2 { + return 2 * a.C * a.C + } + return math.Inf(1) +} diff --git a/vendor/gonum.org/v1/gonum/stat/distuv/bernoulli.go b/vendor/gonum.org/v1/gonum/stat/distuv/bernoulli.go new file mode 100644 index 0000000000..b220afae28 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/stat/distuv/bernoulli.go @@ -0,0 +1,140 @@ +// Copyright ©2016 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package distuv + +import ( + "math" + "math/rand/v2" +) + +// Bernoulli represents a random variable whose value is 1 with probability p and +// value of zero with probability 1-P. The value of P must be between 0 and 1. +// More information at https://en.wikipedia.org/wiki/Bernoulli_distribution. +type Bernoulli struct { + P float64 + Src rand.Source +} + +// CDF computes the value of the cumulative density function at x. +func (b Bernoulli) CDF(x float64) float64 { + if x < 0 { + return 0 + } + if x < 1 { + return 1 - b.P + } + return 1 +} + +// Entropy returns the entropy of the distribution. +func (b Bernoulli) Entropy() float64 { + if b.P == 0 || b.P == 1 { + return 0 + } + q := 1 - b.P + return -b.P*math.Log(b.P) - q*math.Log(q) +} + +// ExKurtosis returns the excess kurtosis of the distribution. +func (b Bernoulli) ExKurtosis() float64 { + pq := b.P * (1 - b.P) + return (1 - 6*pq) / pq +} + +// LogProb computes the natural logarithm of the value of the probability density function at x. +func (b Bernoulli) LogProb(x float64) float64 { + if x == 0 { + return math.Log(1 - b.P) + } + if x == 1 { + return math.Log(b.P) + } + return math.Inf(-1) +} + +// Mean returns the mean of the probability distribution. +func (b Bernoulli) Mean() float64 { + return b.P +} + +// Median returns the median of the probability distribution. +func (b Bernoulli) Median() float64 { + p := b.P + switch { + case p < 0.5: + return 0 + case p > 0.5: + return 1 + default: + return 0.5 + } +} + +// NumParameters returns the number of parameters in the distribution. +func (Bernoulli) NumParameters() int { + return 1 +} + +// Prob computes the value of the probability distribution at x. +func (b Bernoulli) Prob(x float64) float64 { + if x == 0 { + return 1 - b.P + } + if x == 1 { + return b.P + } + return 0 +} + +// Quantile returns the minimum value of x from amongst all those values whose CDF value exceeds or equals p. +func (b Bernoulli) Quantile(p float64) float64 { + if p < 0 || 1 < p { + panic(badPercentile) + } + if p <= 1-b.P { + return 0 + } + return 1 +} + +// Rand returns a random sample drawn from the distribution. +func (b Bernoulli) Rand() float64 { + var rnd float64 + if b.Src == nil { + rnd = rand.Float64() + } else { + rnd = rand.New(b.Src).Float64() + } + if rnd < b.P { + return 1 + } + return 0 +} + +// Skewness returns the skewness of the distribution. +func (b Bernoulli) Skewness() float64 { + return (1 - 2*b.P) / math.Sqrt(b.P*(1-b.P)) +} + +// StdDev returns the standard deviation of the probability distribution. +func (b Bernoulli) StdDev() float64 { + return math.Sqrt(b.Variance()) +} + +// Survival returns the survival function (complementary CDF) at x. +func (b Bernoulli) Survival(x float64) float64 { + if x < 0 { + return 1 + } + if x < 1 { + return b.P + } + return 0 +} + +// Variance returns the variance of the probability distribution. +func (b Bernoulli) Variance() float64 { + return b.P * (1 - b.P) +} diff --git a/vendor/gonum.org/v1/gonum/stat/distuv/beta.go b/vendor/gonum.org/v1/gonum/stat/distuv/beta.go new file mode 100644 index 0000000000..93e9343b6b --- /dev/null +++ b/vendor/gonum.org/v1/gonum/stat/distuv/beta.go @@ -0,0 +1,151 @@ +// Copyright ©2016 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package distuv + +import ( + "math" + "math/rand/v2" + + "gonum.org/v1/gonum/mathext" +) + +// Beta implements the Beta distribution, a two-parameter continuous distribution +// with support between 0 and 1. +// +// The beta distribution has density function +// +// x^(α-1) * (1-x)^(β-1) * Γ(α+β) / (Γ(α)*Γ(β)) +// +// For more information, see https://en.wikipedia.org/wiki/Beta_distribution +type Beta struct { + // Alpha is the left shape parameter of the distribution. Alpha must be greater + // than 0. + Alpha float64 + // Beta is the right shape parameter of the distribution. Beta must be greater + // than 0. + Beta float64 + + Src rand.Source +} + +// CDF computes the value of the cumulative distribution function at x. +func (b Beta) CDF(x float64) float64 { + if x <= 0 { + return 0 + } + if x >= 1 { + return 1 + } + return mathext.RegIncBeta(b.Alpha, b.Beta, x) +} + +// Entropy returns the differential entropy of the distribution. +func (b Beta) Entropy() float64 { + if b.Alpha <= 0 || b.Beta <= 0 { + panic("beta: negative parameters") + } + return mathext.Lbeta(b.Alpha, b.Beta) - (b.Alpha-1)*mathext.Digamma(b.Alpha) - + (b.Beta-1)*mathext.Digamma(b.Beta) + (b.Alpha+b.Beta-2)*mathext.Digamma(b.Alpha+b.Beta) +} + +// ExKurtosis returns the excess kurtosis of the distribution. +func (b Beta) ExKurtosis() float64 { + num := 6 * ((b.Alpha-b.Beta)*(b.Alpha-b.Beta)*(b.Alpha+b.Beta+1) - b.Alpha*b.Beta*(b.Alpha+b.Beta+2)) + den := b.Alpha * b.Beta * (b.Alpha + b.Beta + 2) * (b.Alpha + b.Beta + 3) + return num / den +} + +// LogProb computes the natural logarithm of the value of the probability +// density function at x. +func (b Beta) LogProb(x float64) float64 { + if x < 0 || x > 1 { + return math.Inf(-1) + } + + if b.Alpha <= 0 || b.Beta <= 0 { + panic("beta: negative parameters") + } + + lab, _ := math.Lgamma(b.Alpha + b.Beta) + la, _ := math.Lgamma(b.Alpha) + lb, _ := math.Lgamma(b.Beta) + var lx float64 + if b.Alpha != 1 { + lx = (b.Alpha - 1) * math.Log(x) + } + var l1mx float64 + if b.Beta != 1 { + l1mx = (b.Beta - 1) * math.Log(1-x) + } + return lab - la - lb + lx + l1mx +} + +// Mean returns the mean of the probability distribution. +func (b Beta) Mean() float64 { + return b.Alpha / (b.Alpha + b.Beta) +} + +// Mode returns the mode of the distribution. +// +// Mode returns NaN if both parameters are less than or equal to 1 as a special case, +// 0 if only Alpha <= 1 and 1 if only Beta <= 1. +func (b Beta) Mode() float64 { + if b.Alpha <= 1 { + if b.Beta <= 1 { + return math.NaN() + } + return 0 + } + if b.Beta <= 1 { + return 1 + } + return (b.Alpha - 1) / (b.Alpha + b.Beta - 2) +} + +// NumParameters returns the number of parameters in the distribution. +func (b Beta) NumParameters() int { + return 2 +} + +// Prob computes the value of the probability density function at x. +func (b Beta) Prob(x float64) float64 { + return math.Exp(b.LogProb(x)) +} + +// Quantile returns the inverse of the cumulative distribution function. +func (b Beta) Quantile(p float64) float64 { + if p < 0 || p > 1 { + panic(badPercentile) + } + return mathext.InvRegIncBeta(b.Alpha, b.Beta, p) +} + +// Rand returns a random sample drawn from the distribution. +func (b Beta) Rand() float64 { + ga := Gamma{Alpha: b.Alpha, Beta: 1, Src: b.Src}.Rand() + gb := Gamma{Alpha: b.Beta, Beta: 1, Src: b.Src}.Rand() + return ga / (ga + gb) +} + +// StdDev returns the standard deviation of the probability distribution. +func (b Beta) StdDev() float64 { + return math.Sqrt(b.Variance()) +} + +// Survival returns the survival function (complementary CDF) at x. +func (b Beta) Survival(x float64) float64 { + switch { + case x <= 0: + return 1 + case x >= 1: + return 0 + } + return mathext.RegIncBeta(b.Beta, b.Alpha, 1-x) +} + +// Variance returns the variance of the probability distribution. +func (b Beta) Variance() float64 { + return b.Alpha * b.Beta / ((b.Alpha + b.Beta) * (b.Alpha + b.Beta) * (b.Alpha + b.Beta + 1)) +} diff --git a/vendor/gonum.org/v1/gonum/stat/distuv/binomial.go b/vendor/gonum.org/v1/gonum/stat/distuv/binomial.go new file mode 100644 index 0000000000..4f5f6c7e1f --- /dev/null +++ b/vendor/gonum.org/v1/gonum/stat/distuv/binomial.go @@ -0,0 +1,189 @@ +// Copyright ©2018 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package distuv + +import ( + "math" + "math/rand/v2" + + "gonum.org/v1/gonum/mathext" + "gonum.org/v1/gonum/stat/combin" +) + +// Binomial implements the binomial distribution, a discrete probability distribution +// that expresses the probability of a given number of successful Bernoulli trials +// out of a total of n, each with success probability p. +// The binomial distribution has the density function: +// +// f(k) = (n choose k) p^k (1-p)^(n-k) +// +// For more information, see https://en.wikipedia.org/wiki/Binomial_distribution. +type Binomial struct { + // N is the total number of Bernoulli trials. N must be greater than 0. + N float64 + // P is the probability of success in any given trial. P must be in [0, 1]. + P float64 + + Src rand.Source +} + +// CDF computes the value of the cumulative distribution function at x. +func (b Binomial) CDF(x float64) float64 { + if x < 0 { + return 0 + } + if x >= b.N { + return 1 + } + x = math.Floor(x) + return mathext.RegIncBeta(b.N-x, x+1, 1-b.P) +} + +// ExKurtosis returns the excess kurtosis of the distribution. +func (b Binomial) ExKurtosis() float64 { + v := b.P * (1 - b.P) + return (1 - 6*v) / (b.N * v) +} + +// LogProb computes the natural logarithm of the value of the probability +// density function at x. +func (b Binomial) LogProb(x float64) float64 { + if x < 0 || x > b.N || math.Floor(x) != x { + return math.Inf(-1) + } + lb := combin.LogGeneralizedBinomial(b.N, x) + return lb + x*math.Log(b.P) + (b.N-x)*math.Log(1-b.P) +} + +// Mean returns the mean of the probability distribution. +func (b Binomial) Mean() float64 { + return b.N * b.P +} + +// NumParameters returns the number of parameters in the distribution. +func (Binomial) NumParameters() int { + return 2 +} + +// Prob computes the value of the probability density function at x. +func (b Binomial) Prob(x float64) float64 { + return math.Exp(b.LogProb(x)) +} + +// Rand returns a random sample drawn from the distribution. +func (b Binomial) Rand() float64 { + // NUMERICAL RECIPES IN C: THE ART OF SCIENTIFIC COMPUTING (ISBN 0-521-43108-5) + // p. 295-6 + // http://www.aip.de/groups/soe/local/numres/bookcpdf/c7-3.pdf + + runif := rand.Float64 + rexp := rand.ExpFloat64 + if b.Src != nil { + rnd := rand.New(b.Src) + runif = rnd.Float64 + rexp = rnd.ExpFloat64 + } + + p := b.P + if p > 0.5 { + p = 1 - p + } + am := b.N * p + + if b.N < 25 { + // Use direct method. + bnl := 0.0 + for i := 0; i < int(b.N); i++ { + if runif() < p { + bnl++ + } + } + if p != b.P { + return b.N - bnl + } + return bnl + } + + if am < 1 { + // Use rejection method with Poisson proposal. + const logM = 2.6e-2 // constant for rejection sampling (https://en.wikipedia.org/wiki/Rejection_sampling) + var bnl float64 + z := -p + pclog := (1 + 0.5*z) * z / (1 + (1+1.0/6*z)*z) // Padé approximant of log(1 + x) + for { + bnl = 0.0 + t := 0.0 + for i := 0; i < int(b.N); i++ { + t += rexp() + if t >= am { + break + } + bnl++ + } + bnlc := b.N - bnl + z = -bnl / b.N + log1p := (1 + 0.5*z) * z / (1 + (1+1.0/6*z)*z) + t = (bnlc+0.5)*log1p + bnl - bnlc*pclog + 1/(12*bnlc) - am + logM // Uses Stirling's expansion of log(n!) + if rexp() >= t { + break + } + } + if p != b.P { + return b.N - bnl + } + return bnl + } + // Original algorithm samples from a Poisson distribution with the + // appropriate expected value. However, the Poisson approximation is + // asymptotic such that the absolute deviation in probability is O(1/n). + // Rejection sampling produces exact variates with at worst less than 3% + // rejection with minimal additional computation. + + // Use rejection method with Cauchy proposal. + g, _ := math.Lgamma(b.N + 1) + plog := math.Log(p) + pclog := math.Log1p(-p) + sq := math.Sqrt(2 * am * (1 - p)) + for { + var em, y float64 + for { + y = math.Tan(math.Pi * runif()) + em = sq*y + am + if em >= 0 && em < b.N+1 { + break + } + } + em = math.Floor(em) + lg1, _ := math.Lgamma(em + 1) + lg2, _ := math.Lgamma(b.N - em + 1) + t := 1.2 * sq * (1 + y*y) * math.Exp(g-lg1-lg2+em*plog+(b.N-em)*pclog) + if runif() <= t { + if p != b.P { + return b.N - em + } + return em + } + } +} + +// Skewness returns the skewness of the distribution. +func (b Binomial) Skewness() float64 { + return (1 - 2*b.P) / b.StdDev() +} + +// StdDev returns the standard deviation of the probability distribution. +func (b Binomial) StdDev() float64 { + return math.Sqrt(b.Variance()) +} + +// Survival returns the survival function (complementary CDF) at x. +func (b Binomial) Survival(x float64) float64 { + return 1 - b.CDF(x) +} + +// Variance returns the variance of the probability distribution. +func (b Binomial) Variance() float64 { + return b.N * b.P * (1 - b.P) +} diff --git a/vendor/gonum.org/v1/gonum/stat/distuv/categorical.go b/vendor/gonum.org/v1/gonum/stat/distuv/categorical.go new file mode 100644 index 0000000000..f4b77ee49f --- /dev/null +++ b/vendor/gonum.org/v1/gonum/stat/distuv/categorical.go @@ -0,0 +1,184 @@ +// Copyright ©2015 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package distuv + +import ( + "math" + "math/rand/v2" +) + +// Categorical is an extension of the Bernoulli distribution where x takes +// values {0, 1, ..., len(w)-1} where w is the weight vector. Categorical must +// be initialized with NewCategorical. +type Categorical struct { + weights []float64 + + // heap is a weight heap. + // + // It keeps a heap-organised sum of remaining + // index weights that are available to be taken + // from. + // + // Each element holds the sum of weights for + // the corresponding index, plus the sum of + // its children's weights; the children of + // an element i can be found at positions + // 2*(i+1)-1 and 2*(i+1). The root of the + // weight heap is at element 0. + // + // See comments in container/heap for an + // explanation of the layout of a heap. + heap []float64 + + src rand.Source +} + +// NewCategorical constructs a new categorical distribution where the probability +// that x equals i is proportional to w[i]. All of the weights must be +// nonnegative, and at least one of the weights must be positive. +func NewCategorical(w []float64, src rand.Source) Categorical { + c := Categorical{ + weights: make([]float64, len(w)), + heap: make([]float64, len(w)), + src: src, + } + c.ReweightAll(w) + return c +} + +// CDF computes the value of the cumulative density function at x. +func (c Categorical) CDF(x float64) float64 { + var cdf float64 + for i, w := range c.weights { + if x < float64(i) { + break + } + cdf += w + } + return cdf / c.heap[0] +} + +// Entropy returns the entropy of the distribution. +func (c Categorical) Entropy() float64 { + var ent float64 + for _, w := range c.weights { + if w == 0 { + continue + } + p := w / c.heap[0] + ent += p * math.Log(p) + } + return -ent +} + +// Len returns the number of values x could possibly take (the length of the +// initial supplied weight vector). +func (c Categorical) Len() int { + return len(c.weights) +} + +// Mean returns the mean of the probability distribution. +func (c Categorical) Mean() float64 { + var mean float64 + for i, v := range c.weights { + mean += float64(i) * v + } + return mean / c.heap[0] +} + +// Prob computes the value of the probability density function at x. +func (c Categorical) Prob(x float64) float64 { + xi := int(x) + if float64(xi) != x { + return 0 + } + if xi < 0 || xi > len(c.weights)-1 { + return 0 + } + return c.weights[xi] / c.heap[0] +} + +// LogProb computes the natural logarithm of the value of the probability density function at x. +func (c Categorical) LogProb(x float64) float64 { + return math.Log(c.Prob(x)) +} + +// Rand returns a random draw from the categorical distribution. +func (c Categorical) Rand() float64 { + var r float64 + if c.src == nil { + r = c.heap[0] * rand.Float64() + } else { + r = c.heap[0] * rand.New(c.src).Float64() + } + i := 1 + last := -1 + left := len(c.weights) + for { + if r -= c.weights[i-1]; r <= 0 { + break // Fall within item i-1. + } + i <<= 1 // Move to left child. + if d := c.heap[i-1]; r > d { + r -= d + // If enough r to pass left child, + // move to right child state will + // be caught at break above. + i++ + } + if i == last || left < 0 { + panic("categorical: bad sample") + } + last = i + left-- + } + return float64(i - 1) +} + +// Reweight sets the weight of item idx to w. The input weight must be +// non-negative, and after reweighting at least one of the weights must be +// positive. +func (c Categorical) Reweight(idx int, w float64) { + if w < 0 { + panic("categorical: negative weight") + } + w, c.weights[idx] = c.weights[idx]-w, w + idx++ + for idx > 0 { + c.heap[idx-1] -= w + idx >>= 1 + } + if c.heap[0] <= 0 { + panic("categorical: sum of the weights non-positive") + } +} + +// ReweightAll resets the weights of the distribution. ReweightAll panics if +// len(w) != c.Len. All of the weights must be nonnegative, and at least one of +// the weights must be positive. +func (c Categorical) ReweightAll(w []float64) { + if len(w) != c.Len() { + panic("categorical: length of the slices do not match") + } + for _, v := range w { + if v < 0 { + panic("categorical: negative weight") + } + } + copy(c.weights, w) + c.reset() +} + +func (c Categorical) reset() { + copy(c.heap, c.weights) + for i := len(c.heap) - 1; i > 0; i-- { + // Sometimes 1-based counting makes sense. + c.heap[((i+1)>>1)-1] += c.heap[i] + } + // TODO(btracey): Renormalization for weird weights? + if c.heap[0] <= 0 { + panic("categorical: sum of the weights non-positive") + } +} diff --git a/vendor/gonum.org/v1/gonum/stat/distuv/chi.go b/vendor/gonum.org/v1/gonum/stat/distuv/chi.go new file mode 100644 index 0000000000..105f529653 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/stat/distuv/chi.go @@ -0,0 +1,124 @@ +// Copyright ©2021 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package distuv + +import ( + "math" + "math/rand/v2" + + "gonum.org/v1/gonum/mathext" +) + +// Chi implements the χ distribution, a one parameter distribution +// with support on the positive numbers. +// +// The density function is given by +// +// 1/(2^{k/2-1} * Γ(k/2)) * x^{k - 1} * e^{-x^2/2} +// +// For more information, see https://en.wikipedia.org/wiki/Chi_distribution. +type Chi struct { + // K is the shape parameter, corresponding to the degrees of freedom. Must + // be greater than 0. + K float64 + + Src rand.Source +} + +// CDF computes the value of the cumulative density function at x. +func (c Chi) CDF(x float64) float64 { + return mathext.GammaIncReg(c.K/2, (x*x)/2) +} + +// Entropy returns the differential entropy of the distribution. +func (c Chi) Entropy() float64 { + lg, _ := math.Lgamma(c.K / 2) + return lg + 0.5*(c.K-math.Ln2-(c.K-1)*mathext.Digamma(c.K/2)) +} + +// ExKurtosis returns the excess kurtosis of the distribution. +func (c Chi) ExKurtosis() float64 { + v := c.Variance() + s := math.Sqrt(v) + return 2 / v * (1 - c.Mean()*s*c.Skewness() - v) +} + +// LogProb computes the natural logarithm of the value of the probability +// density function at x. +func (c Chi) LogProb(x float64) float64 { + if x < 0 { + return math.Inf(-1) + } + lg, _ := math.Lgamma(c.K / 2) + return (c.K-1)*math.Log(x) - (x*x)/2 - (c.K/2-1)*math.Ln2 - lg +} + +// Mean returns the mean of the probability distribution. +func (c Chi) Mean() float64 { + lg1, _ := math.Lgamma((c.K + 1) / 2) + lg, _ := math.Lgamma(c.K / 2) + return math.Sqrt2 * math.Exp(lg1-lg) +} + +// Median returns the median of the distribution. +func (c Chi) Median() float64 { + return c.Quantile(0.5) +} + +// Mode returns the mode of the distribution. +// +// Mode returns NaN if K is less than one. +func (c Chi) Mode() float64 { + return math.Sqrt(c.K - 1) +} + +// NumParameters returns the number of parameters in the distribution. +func (c Chi) NumParameters() int { + return 1 +} + +// Prob computes the value of the probability density function at x. +func (c Chi) Prob(x float64) float64 { + return math.Exp(c.LogProb(x)) +} + +// Rand returns a random sample drawn from the distribution. +func (c Chi) Rand() float64 { + return math.Sqrt(Gamma{c.K / 2, 0.5, c.Src}.Rand()) +} + +// Quantile returns the inverse of the cumulative distribution function. +func (c Chi) Quantile(p float64) float64 { + if p < 0 || 1 < p { + panic(badPercentile) + } + return math.Sqrt(2 * mathext.GammaIncRegInv(0.5*c.K, p)) +} + +// Skewness returns the skewness of the distribution. +func (c Chi) Skewness() float64 { + v := c.Variance() + s := math.Sqrt(v) + return c.Mean() / (s * v) * (1 - 2*v) +} + +// StdDev returns the standard deviation of the probability distribution. +func (c Chi) StdDev() float64 { + return math.Sqrt(c.Variance()) +} + +// Survival returns the survival function (complementary CDF) at x. +func (c Chi) Survival(x float64) float64 { + if x < 0 { + return 1 + } + return mathext.GammaIncRegComp(0.5*c.K, 0.5*(x*x)) +} + +// Variance returns the variance of the probability distribution. +func (c Chi) Variance() float64 { + m := c.Mean() + return math.Max(0, c.K-m*m) +} diff --git a/vendor/gonum.org/v1/gonum/stat/distuv/chisquared.go b/vendor/gonum.org/v1/gonum/stat/distuv/chisquared.go new file mode 100644 index 0000000000..1190803ae7 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/stat/distuv/chisquared.go @@ -0,0 +1,101 @@ +// Copyright ©2016 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package distuv + +import ( + "math" + "math/rand/v2" + + "gonum.org/v1/gonum/mathext" +) + +// ChiSquared implements the χ² distribution, a one parameter distribution +// with support on the positive numbers. +// +// The density function is given by +// +// 1/(2^{k/2} * Γ(k/2)) * x^{k/2 - 1} * e^{-x/2} +// +// It is a special case of the Gamma distribution, Γ(k/2, 1/2). +// +// For more information, see https://en.wikipedia.org/wiki/Chi-squared_distribution. +type ChiSquared struct { + // K is the shape parameter, corresponding to the degrees of freedom. Must + // be greater than 0. + K float64 + + Src rand.Source +} + +// CDF computes the value of the cumulative density function at x. +func (c ChiSquared) CDF(x float64) float64 { + return mathext.GammaIncReg(c.K/2, x/2) +} + +// ExKurtosis returns the excess kurtosis of the distribution. +func (c ChiSquared) ExKurtosis() float64 { + return 12 / c.K +} + +// LogProb computes the natural logarithm of the value of the probability +// density function at x. +func (c ChiSquared) LogProb(x float64) float64 { + if x < 0 { + return math.Inf(-1) + } + lg, _ := math.Lgamma(c.K / 2) + return (c.K/2-1)*math.Log(x) - x/2 - (c.K/2)*math.Ln2 - lg +} + +// Mean returns the mean of the probability distribution. +func (c ChiSquared) Mean() float64 { + return c.K +} + +// Mode returns the mode of the distribution. +func (c ChiSquared) Mode() float64 { + return math.Max(c.K-2, 0) +} + +// NumParameters returns the number of parameters in the distribution. +func (c ChiSquared) NumParameters() int { + return 1 +} + +// Prob computes the value of the probability density function at x. +func (c ChiSquared) Prob(x float64) float64 { + return math.Exp(c.LogProb(x)) +} + +// Rand returns a random sample drawn from the distribution. +func (c ChiSquared) Rand() float64 { + return Gamma{c.K / 2, 0.5, c.Src}.Rand() +} + +// Quantile returns the inverse of the cumulative distribution function. +func (c ChiSquared) Quantile(p float64) float64 { + if p < 0 || p > 1 { + panic(badPercentile) + } + return mathext.GammaIncRegInv(0.5*c.K, p) * 2 +} + +// StdDev returns the standard deviation of the probability distribution. +func (c ChiSquared) StdDev() float64 { + return math.Sqrt(c.Variance()) +} + +// Survival returns the survival function (complementary CDF) at x. +func (c ChiSquared) Survival(x float64) float64 { + if x < 0 { + return 1 + } + return mathext.GammaIncRegComp(0.5*c.K, 0.5*x) +} + +// Variance returns the variance of the probability distribution. +func (c ChiSquared) Variance() float64 { + return 2 * c.K +} diff --git a/vendor/gonum.org/v1/gonum/stat/distuv/constants.go b/vendor/gonum.org/v1/gonum/stat/distuv/constants.go new file mode 100644 index 0000000000..3ebe635047 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/stat/distuv/constants.go @@ -0,0 +1,28 @@ +// Copyright ©2014 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package distuv + +const ( + // oneOverRoot2Pi is the value of 1/(2Pi)^(1/2) + // http://www.wolframalpha.com/input/?i=1%2F%282+*+pi%29%5E%281%2F2%29 + oneOverRoot2Pi = 0.39894228040143267793994605993438186847585863116493465766592582967065792589930183850125233390730693643030255886263518268 + + //LogRoot2Pi is the value of log(sqrt(2*Pi)) + logRoot2Pi = 0.91893853320467274178032973640561763986139747363778341281715154048276569592726039769474329863595419762200564662463433744 + negLogRoot2Pi = -logRoot2Pi + log2Pi = 1.8378770664093454835606594728112352797227949472755668 + ln2 = 0.69314718055994530941723212145817656807550013436025525412068000949339362196969471560586332699641868754200148102057068573368552023 + + // Euler–Mascheroni constant. + eulerGamma = 0.5772156649015328606065120900824024310421593359399235988057672348848677267776646709369470632917467495146314472498070824809605 + + // sqrt3 is the value of sqrt(3) + // https://www.wolframalpha.com/input/?i=sqrt%283%29 + sqrt3 = 1.7320508075688772935274463415058723669428052538103806280558069794519330169088000370811461867572485756756261414154067030299699450 +) + +const ( + panicNameMismatch = "parameter name mismatch" +) diff --git a/vendor/gonum.org/v1/gonum/stat/distuv/doc.go b/vendor/gonum.org/v1/gonum/stat/distuv/doc.go new file mode 100644 index 0000000000..68aba2d064 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/stat/distuv/doc.go @@ -0,0 +1,6 @@ +// Copyright ©2017 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package distuv provides univariate random distribution types. +package distuv // import "gonum.org/v1/gonum/stat/distuv" diff --git a/vendor/gonum.org/v1/gonum/stat/distuv/exponential.go b/vendor/gonum.org/v1/gonum/stat/distuv/exponential.go new file mode 100644 index 0000000000..3acadb437c --- /dev/null +++ b/vendor/gonum.org/v1/gonum/stat/distuv/exponential.go @@ -0,0 +1,266 @@ +// Copyright ©2014 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package distuv + +import ( + "math" + "math/rand/v2" + + "gonum.org/v1/gonum/floats" + "gonum.org/v1/gonum/stat" +) + +// Exponential represents the exponential distribution (https://en.wikipedia.org/wiki/Exponential_distribution). +type Exponential struct { + Rate float64 + Src rand.Source +} + +// CDF computes the value of the cumulative density function at x. +func (e Exponential) CDF(x float64) float64 { + if x < 0 { + return 0 + } + return -math.Expm1(-e.Rate * x) +} + +// ConjugateUpdate updates the parameters of the distribution from the sufficient +// statistics of a set of samples. The sufficient statistics, suffStat, have been +// observed with nSamples observations. The prior values of the distribution are those +// currently in the distribution, and have been observed with priorStrength samples. +// +// For the exponential distribution, the sufficient statistic is the inverse of +// the mean of the samples. +// The prior is having seen priorStrength[0] samples with inverse mean Exponential.Rate +// As a result of this function, Exponential.Rate is updated based on the weighted +// samples, and priorStrength is modified to include the new number of samples observed. +// +// This function panics if len(suffStat) != e.NumSuffStat() or +// len(priorStrength) != e.NumSuffStat(). +func (e *Exponential) ConjugateUpdate(suffStat []float64, nSamples float64, priorStrength []float64) { + if len(suffStat) != e.NumSuffStat() { + panic("exponential: incorrect suffStat length") + } + if len(priorStrength) != e.NumSuffStat() { + panic("exponential: incorrect priorStrength length") + } + + totalSamples := nSamples + priorStrength[0] + + totalSum := nSamples / suffStat[0] + if !(priorStrength[0] == 0) { + totalSum += priorStrength[0] / e.Rate + } + e.Rate = totalSamples / totalSum + priorStrength[0] = totalSamples +} + +// Entropy returns the entropy of the distribution. +func (e Exponential) Entropy() float64 { + return 1 - math.Log(e.Rate) +} + +// ExKurtosis returns the excess kurtosis of the distribution. +func (Exponential) ExKurtosis() float64 { + return 6 +} + +// Fit sets the parameters of the probability distribution from the +// data samples x with relative weights w. +// If weights is nil, then all the weights are 1. +// If weights is not nil, then the len(weights) must equal len(samples). +func (e *Exponential) Fit(samples, weights []float64) { + suffStat := make([]float64, e.NumSuffStat()) + nSamples := e.SuffStat(suffStat, samples, weights) + e.ConjugateUpdate(suffStat, nSamples, make([]float64, e.NumSuffStat())) +} + +// LogProb computes the natural logarithm of the value of the probability density function at x. +func (e Exponential) LogProb(x float64) float64 { + if x < 0 { + return math.Inf(-1) + } + return math.Log(e.Rate) - e.Rate*x +} + +// Mean returns the mean of the probability distribution. +func (e Exponential) Mean() float64 { + return 1 / e.Rate +} + +// Median returns the median of the probability distribution. +func (e Exponential) Median() float64 { + return math.Ln2 / e.Rate +} + +// Mode returns the mode of the probability distribution. +func (Exponential) Mode() float64 { + return 0 +} + +// NumParameters returns the number of parameters in the distribution. +func (Exponential) NumParameters() int { + return 1 +} + +// NumSuffStat returns the number of sufficient statistics for the distribution. +func (Exponential) NumSuffStat() int { + return 1 +} + +// Prob computes the value of the probability density function at x. +func (e Exponential) Prob(x float64) float64 { + return math.Exp(e.LogProb(x)) +} + +// Quantile returns the inverse of the cumulative probability distribution. +func (e Exponential) Quantile(p float64) float64 { + if p < 0 || p > 1 { + panic(badPercentile) + } + return -math.Log(1-p) / e.Rate +} + +// Rand returns a random sample drawn from the distribution. +func (e Exponential) Rand() float64 { + var rnd float64 + if e.Src == nil { + rnd = rand.ExpFloat64() + } else { + rnd = rand.New(e.Src).ExpFloat64() + } + return rnd / e.Rate +} + +// Score returns the score function with respect to the parameters of the +// distribution at the input location x. The score function is the derivative +// of the log-likelihood at x with respect to the parameters +// +// (∂/∂θ) log(p(x;θ)) +// +// If deriv is non-nil, len(deriv) must equal the number of parameters otherwise +// Score will panic, and the derivative is stored in-place into deriv. If deriv +// is nil a new slice will be allocated and returned. +// +// The order is [∂LogProb / ∂Rate]. +// +// For more information, see https://en.wikipedia.org/wiki/Score_%28statistics%29. +// +// Special cases: +// +// Score(0) = [NaN] +func (e Exponential) Score(deriv []float64, x float64) []float64 { + if deriv == nil { + deriv = make([]float64, e.NumParameters()) + } + if len(deriv) != e.NumParameters() { + panic(badLength) + } + if x > 0 { + deriv[0] = 1/e.Rate - x + return deriv + } + if x < 0 { + deriv[0] = 0 + return deriv + } + deriv[0] = math.NaN() + return deriv +} + +// ScoreInput returns the score function with respect to the input of the +// distribution at the input location specified by x. The score function is the +// derivative of the log-likelihood +// +// (d/dx) log(p(x)) . +// +// Special cases: +// +// ScoreInput(0) = NaN +func (e Exponential) ScoreInput(x float64) float64 { + if x > 0 { + return -e.Rate + } + if x < 0 { + return 0 + } + return math.NaN() +} + +// Skewness returns the skewness of the distribution. +func (Exponential) Skewness() float64 { + return 2 +} + +// StdDev returns the standard deviation of the probability distribution. +func (e Exponential) StdDev() float64 { + return 1 / e.Rate +} + +// SuffStat computes the sufficient statistics of set of samples to update +// the distribution. The sufficient statistics are stored in place, and the +// effective number of samples are returned. +// +// The exponential distribution has one sufficient statistic, the average rate +// of the samples. +// +// If weights is nil, the weights are assumed to be 1, otherwise panics if +// len(samples) != len(weights). Panics if len(suffStat) != NumSuffStat(). +func (Exponential) SuffStat(suffStat, samples, weights []float64) (nSamples float64) { + if len(weights) != 0 && len(samples) != len(weights) { + panic(badLength) + } + + if len(suffStat) != (Exponential{}).NumSuffStat() { + panic(badSuffStat) + } + + if len(weights) == 0 { + nSamples = float64(len(samples)) + } else { + nSamples = floats.Sum(weights) + } + + mean := stat.Mean(samples, weights) + suffStat[0] = 1 / mean + return nSamples +} + +// Survival returns the survival function (complementary CDF) at x. +func (e Exponential) Survival(x float64) float64 { + if x < 0 { + return 1 + } + return math.Exp(-e.Rate * x) +} + +// setParameters modifies the parameters of the distribution. +func (e *Exponential) setParameters(p []Parameter) { + if len(p) != e.NumParameters() { + panic("exponential: incorrect number of parameters to set") + } + if p[0].Name != "Rate" { + panic("exponential: " + panicNameMismatch) + } + e.Rate = p[0].Value +} + +// Variance returns the variance of the probability distribution. +func (e Exponential) Variance() float64 { + return 1 / (e.Rate * e.Rate) +} + +// parameters returns the parameters of the distribution. +func (e Exponential) parameters(p []Parameter) []Parameter { + nParam := e.NumParameters() + if p == nil { + p = make([]Parameter, nParam) + } else if len(p) != nParam { + panic("exponential: improper parameter length") + } + p[0].Name = "Rate" + p[0].Value = e.Rate + return p +} diff --git a/vendor/gonum.org/v1/gonum/stat/distuv/f.go b/vendor/gonum.org/v1/gonum/stat/distuv/f.go new file mode 100644 index 0000000000..299fce4fc3 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/stat/distuv/f.go @@ -0,0 +1,134 @@ +// Copyright ©2017 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package distuv + +import ( + "math" + "math/rand/v2" + + "gonum.org/v1/gonum/mathext" +) + +// F implements the F-distribution, a two-parameter continuous distribution +// with support over the positive real numbers. +// +// The F-distribution has density function +// +// sqrt(((d1*x)^d1) * d2^d2 / ((d1*x+d2)^(d1+d2))) / (x * B(d1/2,d2/2)) +// +// where B is the beta function. +// +// For more information, see https://en.wikipedia.org/wiki/F-distribution +type F struct { + D1 float64 // Degrees of freedom for the numerator + D2 float64 // Degrees of freedom for the denominator + Src rand.Source +} + +// CDF computes the value of the cumulative density function at x. +func (f F) CDF(x float64) float64 { + return mathext.RegIncBeta(f.D1/2, f.D2/2, f.D1*x/(f.D1*x+f.D2)) +} + +// ExKurtosis returns the excess kurtosis of the distribution. +// +// ExKurtosis returns NaN if the D2 parameter is less or equal to 8. +func (f F) ExKurtosis() float64 { + if f.D2 <= 8 { + return math.NaN() + } + return (12 / (f.D2 - 6)) * ((5*f.D2-22)/(f.D2-8) + ((f.D2-4)/f.D1)*((f.D2-2)/(f.D2-8))*((f.D2-2)/(f.D1+f.D2-2))) +} + +// LogProb computes the natural logarithm of the value of the probability +// density function at x. +func (f F) LogProb(x float64) float64 { + return 0.5*(f.D1*math.Log(f.D1*x)+f.D2*math.Log(f.D2)-(f.D1+f.D2)*math.Log(f.D1*x+f.D2)) - math.Log(x) - mathext.Lbeta(f.D1/2, f.D2/2) +} + +// Mean returns the mean of the probability distribution. +// +// Mean returns NaN if the D2 parameter is less than or equal to 2. +func (f F) Mean() float64 { + if f.D2 <= 2 { + return math.NaN() + } + return f.D2 / (f.D2 - 2) +} + +// Mode returns the mode of the distribution. +// +// Mode returns NaN if the D1 parameter is less than or equal to 2. +func (f F) Mode() float64 { + if f.D1 <= 2 { + return math.NaN() + } + return ((f.D1 - 2) / f.D1) * (f.D2 / (f.D2 + 2)) +} + +// NumParameters returns the number of parameters in the distribution. +func (f F) NumParameters() int { + return 2 +} + +// Prob computes the value of the probability density function at x. +func (f F) Prob(x float64) float64 { + return math.Exp(f.LogProb(x)) +} + +// Quantile returns the inverse of the cumulative distribution function. +func (f F) Quantile(p float64) float64 { + if p < 0 || p > 1 { + panic(badPercentile) + } + y := mathext.InvRegIncBeta(0.5*f.D1, 0.5*f.D2, p) + return f.D2 * y / (f.D1 * (1 - y)) +} + +// Rand returns a random sample drawn from the distribution. +func (f F) Rand() float64 { + u1 := ChiSquared{f.D1, f.Src}.Rand() + u2 := ChiSquared{f.D2, f.Src}.Rand() + return (u1 / f.D1) / (u2 / f.D2) +} + +// Skewness returns the skewness of the distribution. +// +// Skewness returns NaN if the D2 parameter is less than or equal to 6. +func (f F) Skewness() float64 { + if f.D2 <= 6 { + return math.NaN() + } + num := (2*f.D1 + f.D2 - 2) * math.Sqrt(8*(f.D2-4)) + den := (f.D2 - 6) * math.Sqrt(f.D1*(f.D1+f.D2-2)) + return num / den +} + +// StdDev returns the standard deviation of the probability distribution. +// +// StdDev returns NaN if the D2 parameter is less than or equal to 4. +func (f F) StdDev() float64 { + if f.D2 <= 4 { + return math.NaN() + } + return math.Sqrt(f.Variance()) +} + +// Survival returns the survival function (complementary CDF) at x. +func (f F) Survival(x float64) float64 { + return 1 - f.CDF(x) +} + +// Variance returns the variance of the probability distribution. +// +// Variance returns NaN if the D2 parameter is less than or equal to 4. +func (f F) Variance() float64 { + if f.D2 <= 4 { + return math.NaN() + } + num := 2 * f.D2 * f.D2 * (f.D1 + f.D2 - 2) + den := f.D1 * (f.D2 - 2) * (f.D2 - 2) * (f.D2 - 4) + return num / den +} diff --git a/vendor/gonum.org/v1/gonum/stat/distuv/gamma.go b/vendor/gonum.org/v1/gonum/stat/distuv/gamma.go new file mode 100644 index 0000000000..739574f5c6 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/stat/distuv/gamma.go @@ -0,0 +1,203 @@ +// Copyright ©2016 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package distuv + +import ( + "math" + "math/rand/v2" + + "gonum.org/v1/gonum/mathext" +) + +// Gamma implements the Gamma distribution, a two-parameter continuous distribution +// with support over the positive real numbers. +// +// The gamma distribution has density function +// +// β^α / Γ(α) x^(α-1)e^(-βx) +// +// For more information, see https://en.wikipedia.org/wiki/Gamma_distribution +type Gamma struct { + // Alpha is the shape parameter of the distribution. Alpha must be greater + // than 0. If Alpha == 1, this is equivalent to an exponential distribution. + Alpha float64 + // Beta is the rate parameter of the distribution. Beta must be greater than 0. + // If Beta == 2, this is equivalent to a Chi-Squared distribution. + Beta float64 + + Src rand.Source +} + +// CDF computes the value of the cumulative distribution function at x. +func (g Gamma) CDF(x float64) float64 { + if x < 0 { + return 0 + } + return mathext.GammaIncReg(g.Alpha, g.Beta*x) +} + +// ExKurtosis returns the excess kurtosis of the distribution. +func (g Gamma) ExKurtosis() float64 { + return 6 / g.Alpha +} + +// LogProb computes the natural logarithm of the value of the probability +// density function at x. +func (g Gamma) LogProb(x float64) float64 { + if x < 0 { + return math.Inf(-1) + } + a := g.Alpha + b := g.Beta + lg, _ := math.Lgamma(a) + if a == 1 { + return math.Log(b) - lg - b*x + } + return a*math.Log(b) - lg + (a-1)*math.Log(x) - b*x +} + +// Mean returns the mean of the probability distribution. +func (g Gamma) Mean() float64 { + return g.Alpha / g.Beta +} + +// Mode returns the mode of the gamma distribution. +// +// The mode is 0 in the special case where the Alpha (shape) parameter +// is less than 1. +func (g Gamma) Mode() float64 { + if g.Alpha < 1 { + return 0 + } + return (g.Alpha - 1) / g.Beta +} + +// NumParameters returns the number of parameters in the distribution. +func (Gamma) NumParameters() int { + return 2 +} + +// Prob computes the value of the probability density function at x. +func (g Gamma) Prob(x float64) float64 { + return math.Exp(g.LogProb(x)) +} + +// Quantile returns the inverse of the cumulative distribution function. +func (g Gamma) Quantile(p float64) float64 { + if p < 0 || p > 1 { + panic(badPercentile) + } + return mathext.GammaIncRegInv(g.Alpha, p) / g.Beta +} + +// Rand returns a random sample drawn from the distribution. +// +// Rand panics if either alpha or beta is <= 0. +func (g Gamma) Rand() float64 { + const ( + // The 0.2 threshold is from https://www4.stat.ncsu.edu/~rmartin/Codes/rgamss.R + // described in detail in https://arxiv.org/abs/1302.1884. + smallAlphaThresh = 0.2 + ) + if g.Beta <= 0 { + panic("gamma: beta <= 0") + } + + unifrnd := rand.Float64 + exprnd := rand.ExpFloat64 + normrnd := rand.NormFloat64 + if g.Src != nil { + rnd := rand.New(g.Src) + unifrnd = rnd.Float64 + exprnd = rnd.ExpFloat64 + normrnd = rnd.NormFloat64 + } + + a := g.Alpha + b := g.Beta + switch { + case a <= 0: + panic("gamma: alpha <= 0") + case a == 1: + // Generate from exponential + return exprnd() / b + case a < smallAlphaThresh: + // Generate using + // Liu, Chuanhai, Martin, Ryan and Syring, Nick. "Simulating from a + // gamma distribution with small shape parameter" + // https://arxiv.org/abs/1302.1884 + // use this reference: http://link.springer.com/article/10.1007/s00180-016-0692-0 + + // Algorithm adjusted to work in log space as much as possible. + lambda := 1/a - 1 + lr := -math.Log1p(1 / lambda / math.E) + for { + e := exprnd() + var z float64 + if e >= -lr { + z = e + lr + } else { + z = -exprnd() / lambda + } + eza := math.Exp(-z / a) + lh := -z - eza + var lEta float64 + if z >= 0 { + lEta = -z + } else { + lEta = -1 + lambda*z + } + if lh-lEta > -exprnd() { + return eza / b + } + } + case a >= smallAlphaThresh: + // Generate using: + // Marsaglia, George, and Wai Wan Tsang. "A simple method for generating + // gamma variables." ACM Transactions on Mathematical Software (TOMS) + // 26.3 (2000): 363-372. + d := a - 1.0/3 + m := 1.0 + if a < 1 { + d += 1.0 + m = math.Pow(unifrnd(), 1/a) + } + c := 1 / (3 * math.Sqrt(d)) + for { + x := normrnd() + v := 1 + x*c + if v <= 0.0 { + continue + } + v = v * v * v + u := unifrnd() + if u < 1.0-0.0331*(x*x)*(x*x) { + return m * d * v / b + } + if math.Log(u) < 0.5*x*x+d*(1-v+math.Log(v)) { + return m * d * v / b + } + } + } + panic("unreachable") +} + +// Survival returns the survival function (complementary CDF) at x. +func (g Gamma) Survival(x float64) float64 { + if x < 0 { + return 1 + } + return mathext.GammaIncRegComp(g.Alpha, g.Beta*x) +} + +// StdDev returns the standard deviation of the probability distribution. +func (g Gamma) StdDev() float64 { + return math.Sqrt(g.Alpha) / g.Beta +} + +// Variance returns the variance of the probability distribution. +func (g Gamma) Variance() float64 { + return g.Alpha / g.Beta / g.Beta +} diff --git a/vendor/gonum.org/v1/gonum/stat/distuv/general.go b/vendor/gonum.org/v1/gonum/stat/distuv/general.go new file mode 100644 index 0000000000..5b78991943 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/stat/distuv/general.go @@ -0,0 +1,24 @@ +// Copyright ©2014 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package distuv + +// Parameter represents a parameter of a probability distribution +type Parameter struct { + Name string + Value float64 +} + +const ( + badPercentile = "distuv: percentile out of bounds" + badLength = "distuv: slice length mismatch" + badSuffStat = "distuv: wrong suffStat length" + errNoSamples = "distuv: must have at least one sample" +) + +const ( + expNegOneHalf = 0.6065306597126334236037995349911804534419 // https://oeis.org/A092605 + eulerMascheroni = 0.5772156649015328606065120900824024310421 // https://oeis.org/A001620 + apery = 1.2020569031595942853997381615114499907649 // https://oeis.org/A002117 +) diff --git a/vendor/gonum.org/v1/gonum/stat/distuv/gumbel.go b/vendor/gonum.org/v1/gonum/stat/distuv/gumbel.go new file mode 100644 index 0000000000..7017dd4e1b --- /dev/null +++ b/vendor/gonum.org/v1/gonum/stat/distuv/gumbel.go @@ -0,0 +1,118 @@ +// Copyright ©2018 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package distuv + +import ( + "math" + "math/rand/v2" +) + +// GumbelRight implements the right-skewed Gumbel distribution, a two-parameter +// continuous distribution with support over the real numbers. The right-skewed +// Gumbel distribution is also sometimes known as the Extreme Value distribution. +// +// The right-skewed Gumbel distribution has density function +// +// 1/beta * exp(-(z + exp(-z))) +// z = (x - mu)/beta +// +// Beta must be greater than 0. +// +// For more information, see https://en.wikipedia.org/wiki/Gumbel_distribution. +type GumbelRight struct { + Mu float64 + Beta float64 + Src rand.Source +} + +func (g GumbelRight) z(x float64) float64 { + return (x - g.Mu) / g.Beta +} + +// CDF computes the value of the cumulative density function at x. +func (g GumbelRight) CDF(x float64) float64 { + z := g.z(x) + return math.Exp(-math.Exp(-z)) +} + +// Entropy returns the differential entropy of the distribution. +func (g GumbelRight) Entropy() float64 { + return math.Log(g.Beta) + eulerMascheroni + 1 +} + +// ExKurtosis returns the excess kurtosis of the distribution. +func (g GumbelRight) ExKurtosis() float64 { + return 12.0 / 5 +} + +// LogProb computes the natural logarithm of the value of the probability density function at x. +func (g GumbelRight) LogProb(x float64) float64 { + z := g.z(x) + return -math.Log(g.Beta) - z - math.Exp(-z) +} + +// Mean returns the mean of the probability distribution. +func (g GumbelRight) Mean() float64 { + return g.Mu + g.Beta*eulerMascheroni +} + +// Median returns the median of the Gumbel distribution. +func (g GumbelRight) Median() float64 { + return g.Mu - g.Beta*math.Log(math.Ln2) +} + +// Mode returns the mode of the normal distribution. +func (g GumbelRight) Mode() float64 { + return g.Mu +} + +// NumParameters returns the number of parameters in the distribution. +func (GumbelRight) NumParameters() int { + return 2 +} + +// Prob computes the value of the probability density function at x. +func (g GumbelRight) Prob(x float64) float64 { + return math.Exp(g.LogProb(x)) +} + +// Quantile returns the inverse of the cumulative probability distribution. +func (g GumbelRight) Quantile(p float64) float64 { + if p < 0 || 1 < p { + panic(badPercentile) + } + return g.Mu - g.Beta*math.Log(-math.Log(p)) +} + +// Rand returns a random sample drawn from the distribution. +func (g GumbelRight) Rand() float64 { + var rnd float64 + if g.Src == nil { + rnd = rand.ExpFloat64() + } else { + rnd = rand.New(g.Src).ExpFloat64() + } + return g.Mu - g.Beta*math.Log(rnd) +} + +// Skewness returns the skewness of the distribution. +func (GumbelRight) Skewness() float64 { + return 12 * math.Sqrt(6) * apery / (math.Pi * math.Pi * math.Pi) +} + +// StdDev returns the standard deviation of the probability distribution. +func (g GumbelRight) StdDev() float64 { + return (math.Pi / math.Sqrt(6)) * g.Beta +} + +// Survival returns the survival function (complementary CDF) at x. +func (g GumbelRight) Survival(x float64) float64 { + return 1 - g.CDF(x) +} + +// Variance returns the variance of the probability distribution. +func (g GumbelRight) Variance() float64 { + return math.Pi * math.Pi * g.Beta * g.Beta / 6 +} diff --git a/vendor/gonum.org/v1/gonum/stat/distuv/interfaces.go b/vendor/gonum.org/v1/gonum/stat/distuv/interfaces.go new file mode 100644 index 0000000000..a3cc94bb27 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/stat/distuv/interfaces.go @@ -0,0 +1,32 @@ +// Copyright ©2015 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package distuv + +// LogProber wraps the LogProb method. +type LogProber interface { + // LogProb returns the natural logarithm of the + // value of the probability density or probability + // mass function at x. + LogProb(x float64) float64 +} + +// Rander wraps the Rand method. +type Rander interface { + // Rand returns a random sample drawn from the distribution. + Rand() float64 +} + +// RandLogProber is the interface that groups the Rander and LogProber methods. +type RandLogProber interface { + Rander + LogProber +} + +// Quantiler wraps the Quantile method. +type Quantiler interface { + // Quantile returns the minimum value of x from amongst + // all those values whose CDF value exceeds or equals p. + Quantile(p float64) float64 +} diff --git a/vendor/gonum.org/v1/gonum/stat/distuv/inversegamma.go b/vendor/gonum.org/v1/gonum/stat/distuv/inversegamma.go new file mode 100644 index 0000000000..44fe5e6ce9 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/stat/distuv/inversegamma.go @@ -0,0 +1,123 @@ +// Copyright ©2018 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package distuv + +import ( + "math" + "math/rand/v2" + + "gonum.org/v1/gonum/mathext" +) + +// InverseGamma implements the inverse gamma distribution, a two-parameter +// continuous distribution with support over the positive real numbers. The +// inverse gamma distribution is the same as the distribution of the reciprocal +// of a gamma distributed random variable. +// +// The inverse gamma distribution has density function +// +// β^α / Γ(α) x^(-α-1)e^(-β/x) +// +// For more information, see https://en.wikipedia.org/wiki/Inverse-gamma_distribution +type InverseGamma struct { + // Alpha is the shape parameter of the distribution. Alpha must be greater than 0. + Alpha float64 + // Beta is the scale parameter of the distribution. Beta must be greater than 0. + Beta float64 + + Src rand.Source +} + +// CDF computes the value of the cumulative distribution function at x. +func (g InverseGamma) CDF(x float64) float64 { + if x < 0 { + return 0 + } + // TODO(btracey): Replace this with a direct call to the upper regularized + // gamma function if mathext gets it. + //return 1 - mathext.GammaInc(g.Alpha, g.Beta/x) + return mathext.GammaIncRegComp(g.Alpha, g.Beta/x) +} + +// ExKurtosis returns the excess kurtosis of the distribution. +func (g InverseGamma) ExKurtosis() float64 { + if g.Alpha <= 4 { + return math.Inf(1) + } + return (30*g.Alpha - 66) / (g.Alpha - 3) / (g.Alpha - 4) +} + +// LogProb computes the natural logarithm of the value of the probability +// density function at x. +func (g InverseGamma) LogProb(x float64) float64 { + if x <= 0 { + return math.Inf(-1) + } + a := g.Alpha + b := g.Beta + lg, _ := math.Lgamma(a) + return a*math.Log(b) - lg + (-a-1)*math.Log(x) - b/x +} + +// Mean returns the mean of the probability distribution. +func (g InverseGamma) Mean() float64 { + if g.Alpha <= 1 { + return math.Inf(1) + } + return g.Beta / (g.Alpha - 1) +} + +// Mode returns the mode of the distribution. +func (g InverseGamma) Mode() float64 { + return g.Beta / (g.Alpha + 1) +} + +// NumParameters returns the number of parameters in the distribution. +func (InverseGamma) NumParameters() int { + return 2 +} + +// Prob computes the value of the probability density function at x. +func (g InverseGamma) Prob(x float64) float64 { + return math.Exp(g.LogProb(x)) +} + +// Quantile returns the inverse of the cumulative distribution function. +func (g InverseGamma) Quantile(p float64) float64 { + if p < 0 || 1 < p { + panic(badPercentile) + } + return (1 / (mathext.GammaIncRegCompInv(g.Alpha, p))) * g.Beta +} + +// Rand returns a random sample drawn from the distribution. +// +// Rand panics if either alpha or beta is <= 0. +func (g InverseGamma) Rand() float64 { + // TODO(btracey): See if there is a more direct way to sample. + return 1 / Gamma(g).Rand() +} + +// Survival returns the survival function (complementary CDF) at x. +func (g InverseGamma) Survival(x float64) float64 { + if x < 0 { + return 1 + } + return mathext.GammaIncReg(g.Alpha, g.Beta/x) +} + +// StdDev returns the standard deviation of the probability distribution. +func (g InverseGamma) StdDev() float64 { + return math.Sqrt(g.Variance()) +} + +// Variance returns the variance of the probability distribution. +func (g InverseGamma) Variance() float64 { + if g.Alpha <= 2 { + return math.Inf(1) + } + v := g.Beta / (g.Alpha - 1) + return v * v / (g.Alpha - 2) +} diff --git a/vendor/gonum.org/v1/gonum/stat/distuv/laplace.go b/vendor/gonum.org/v1/gonum/stat/distuv/laplace.go new file mode 100644 index 0000000000..36d965512e --- /dev/null +++ b/vendor/gonum.org/v1/gonum/stat/distuv/laplace.go @@ -0,0 +1,267 @@ +// Copyright ©2014 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package distuv + +import ( + "math" + "math/rand/v2" + "sort" + + "gonum.org/v1/gonum/stat" +) + +// Laplace represents the Laplace distribution (https://en.wikipedia.org/wiki/Laplace_distribution). +type Laplace struct { + Mu float64 // Mean of the Laplace distribution + Scale float64 // Scale of the Laplace distribution + Src rand.Source +} + +// CDF computes the value of the cumulative density function at x. +func (l Laplace) CDF(x float64) float64 { + if x < l.Mu { + return 0.5 * math.Exp((x-l.Mu)/l.Scale) + } + return 1 - 0.5*math.Exp(-(x-l.Mu)/l.Scale) +} + +// Entropy returns the entropy of the distribution. +func (l Laplace) Entropy() float64 { + return 1 + math.Log(2*l.Scale) +} + +// ExKurtosis returns the excess kurtosis of the distribution. +func (l Laplace) ExKurtosis() float64 { + return 3 +} + +// Fit sets the parameters of the probability distribution from the +// data samples x with relative weights w. +// If weights is nil, then all the weights are 1. +// If weights is not nil, then the len(weights) must equal len(samples). +// +// Note: Laplace distribution has no FitPrior because it has no sufficient +// statistics. +func (l *Laplace) Fit(samples, weights []float64) { + if weights != nil && len(samples) != len(weights) { + panic(badLength) + } + + if len(samples) == 0 { + panic(errNoSamples) + } + if len(samples) == 1 { + l.Mu = samples[0] + l.Scale = 0 + return + } + + var ( + sortedSamples []float64 + sortedWeights []float64 + ) + if sort.Float64sAreSorted(samples) { + sortedSamples = samples + sortedWeights = weights + } else { + // Need to copy variables so the input variables aren't effected by the sorting + sortedSamples = make([]float64, len(samples)) + copy(sortedSamples, samples) + sortedWeights := make([]float64, len(samples)) + copy(sortedWeights, weights) + + stat.SortWeighted(sortedSamples, sortedWeights) + } + + // The (weighted) median of the samples is the maximum likelihood estimate + // of the mean parameter + // TODO: Rethink quantile type when stat has more options + l.Mu = stat.Quantile(0.5, stat.Empirical, sortedSamples, sortedWeights) + + // The scale parameter is the average absolute distance + // between the sample and the mean + var absError float64 + var sumWeights float64 + if weights != nil { + for i, v := range samples { + absError += weights[i] * math.Abs(l.Mu-v) + sumWeights += weights[i] + } + l.Scale = absError / sumWeights + } else { + for _, v := range samples { + absError += math.Abs(l.Mu - v) + } + l.Scale = absError / float64(len(samples)) + } +} + +// LogProb computes the natural logarithm of the value of the probability density +// function at x. +func (l Laplace) LogProb(x float64) float64 { + return -math.Ln2 - math.Log(l.Scale) - math.Abs(x-l.Mu)/l.Scale +} + +// parameters returns the parameters of the distribution. +func (l Laplace) parameters(p []Parameter) []Parameter { + nParam := l.NumParameters() + if p == nil { + p = make([]Parameter, nParam) + } else if len(p) != nParam { + panic(badLength) + } + p[0].Name = "Mu" + p[0].Value = l.Mu + p[1].Name = "Scale" + p[1].Value = l.Scale + return p +} + +// Mean returns the mean of the probability distribution. +func (l Laplace) Mean() float64 { + return l.Mu +} + +// Median returns the median of the LaPlace distribution. +func (l Laplace) Median() float64 { + return l.Mu +} + +// Mode returns the mode of the LaPlace distribution. +func (l Laplace) Mode() float64 { + return l.Mu +} + +// NumParameters returns the number of parameters in the distribution. +func (l Laplace) NumParameters() int { + return 2 +} + +// Quantile returns the inverse of the cumulative probability distribution. +func (l Laplace) Quantile(p float64) float64 { + if p < 0 || p > 1 { + panic(badPercentile) + } + if p < 0.5 { + return l.Mu + l.Scale*math.Log(1+2*(p-0.5)) + } + return l.Mu - l.Scale*math.Log(1-2*(p-0.5)) +} + +// Prob computes the value of the probability density function at x. +func (l Laplace) Prob(x float64) float64 { + return math.Exp(l.LogProb(x)) +} + +// Rand returns a random sample drawn from the distribution. +func (l Laplace) Rand() float64 { + var rnd float64 + if l.Src == nil { + rnd = rand.Float64() + } else { + rnd = rand.New(l.Src).Float64() + } + u := rnd - 0.5 + if u < 0 { + return l.Mu + l.Scale*math.Log(1+2*u) + } + return l.Mu - l.Scale*math.Log(1-2*u) +} + +// Score returns the score function with respect to the parameters of the +// distribution at the input location x. The score function is the derivative +// of the log-likelihood at x with respect to the parameters +// +// (∂/∂θ) log(p(x;θ)) +// +// If deriv is non-nil, len(deriv) must equal the number of parameters otherwise +// Score will panic, and the derivative is stored in-place into deriv. If deriv +// is nil a new slice will be allocated and returned. +// +// The order is [∂LogProb / ∂Mu, ∂LogProb / ∂Scale]. +// +// For more information, see https://en.wikipedia.org/wiki/Score_%28statistics%29. +// +// Special cases: +// +// Score(l.Mu) = [NaN, -1/l.Scale] +func (l Laplace) Score(deriv []float64, x float64) []float64 { + if deriv == nil { + deriv = make([]float64, l.NumParameters()) + } + if len(deriv) != l.NumParameters() { + panic(badLength) + } + diff := x - l.Mu + if diff > 0 { + deriv[0] = 1 / l.Scale + } else if diff < 0 { + deriv[0] = -1 / l.Scale + } else { + // must be NaN + deriv[0] = math.NaN() + } + + deriv[1] = math.Abs(diff)/(l.Scale*l.Scale) - 1/l.Scale + return deriv +} + +// ScoreInput returns the score function with respect to the input of the +// distribution at the input location specified by x. The score function is the +// derivative of the log-likelihood +// +// (d/dx) log(p(x)) . +// +// Special cases: +// +// ScoreInput(l.Mu) = NaN +func (l Laplace) ScoreInput(x float64) float64 { + diff := x - l.Mu + if diff == 0 { + return math.NaN() + } + if diff > 0 { + return -1 / l.Scale + } + return 1 / l.Scale +} + +// Skewness returns the skewness of the distribution. +func (Laplace) Skewness() float64 { + return 0 +} + +// StdDev returns the standard deviation of the distribution. +func (l Laplace) StdDev() float64 { + return math.Sqrt2 * l.Scale +} + +// Survival returns the survival function (complementary CDF) at x. +func (l Laplace) Survival(x float64) float64 { + if x < l.Mu { + return 1 - 0.5*math.Exp((x-l.Mu)/l.Scale) + } + return 0.5 * math.Exp(-(x-l.Mu)/l.Scale) +} + +// setParameters modifies the parameters of the distribution. +func (l *Laplace) setParameters(p []Parameter) { + if len(p) != l.NumParameters() { + panic(badLength) + } + if p[0].Name != "Mu" { + panic("laplace: " + panicNameMismatch) + } + if p[1].Name != "Scale" { + panic("laplace: " + panicNameMismatch) + } + l.Mu = p[0].Value + l.Scale = p[1].Value +} + +// Variance returns the variance of the probability distribution. +func (l Laplace) Variance() float64 { + return 2 * l.Scale * l.Scale +} diff --git a/vendor/gonum.org/v1/gonum/stat/distuv/logistic.go b/vendor/gonum.org/v1/gonum/stat/distuv/logistic.go new file mode 100644 index 0000000000..0392d6ccc1 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/stat/distuv/logistic.go @@ -0,0 +1,98 @@ +// Copyright ©2021 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package distuv + +import ( + "math" +) + +// Logistic implements the Logistic distribution, a two-parameter distribution with support on the real axis. +// Its cumulative distribution function is the logistic function. +// +// General form of probability density function for Logistic distribution is +// +// E(x) / (s * (1 + E(x))^2) +// where E(x) = exp(-(x-μ)/s) +// +// For more information, see https://en.wikipedia.org/wiki/Logistic_distribution. +type Logistic struct { + Mu float64 // Mean value + S float64 // Scale parameter proportional to standard deviation +} + +// CDF computes the value of the cumulative density function at x. +func (l Logistic) CDF(x float64) float64 { + return 1 / (1 + math.Exp(-(x-l.Mu)/l.S)) +} + +// ExKurtosis returns the excess kurtosis of the distribution. +func (l Logistic) ExKurtosis() float64 { + return 6.0 / 5.0 +} + +// LogProb computes the natural logarithm of the value of the probability +// density function at x. +func (l Logistic) LogProb(x float64) float64 { + return x - 2*math.Log(math.Exp(x)+1) +} + +// Mean returns the mean of the probability distribution. +func (l Logistic) Mean() float64 { + return l.Mu +} + +// Mode returns the mode of the distribution. +// +// It is same as Mean for Logistic distribution. +func (l Logistic) Mode() float64 { + return l.Mu +} + +// Median returns the median of the distribution. +// +// It is same as Mean for Logistic distribution. +func (l Logistic) Median() float64 { + return l.Mu +} + +// NumParameters returns the number of parameters in the distribution. +// +// Always returns 2. +func (l Logistic) NumParameters() int { + return 2 +} + +// Prob computes the value of the probability density function at x. +func (l Logistic) Prob(x float64) float64 { + E := math.Exp(-(x - l.Mu) / l.S) + return E / (l.S * math.Pow(1+E, 2)) +} + +// Quantile returns the inverse of the cumulative distribution function. +func (l Logistic) Quantile(p float64) float64 { + return l.Mu + l.S*math.Log(p/(1-p)) +} + +// Skewness returns the skewness of the distribution. +// +// Always 0 for Logistic distribution. +func (l Logistic) Skewness() float64 { + return 0 +} + +// StdDev returns the standard deviation of the probability distribution. +func (l Logistic) StdDev() float64 { + return l.S * math.Pi / sqrt3 +} + +// Survival returns the survival function (complementary CDF) at x. +func (l Logistic) Survival(x float64) float64 { + return 1 - l.CDF(x) +} + +// Variance returns the variance of the probability distribution. +func (l Logistic) Variance() float64 { + return l.S * l.S * math.Pi * math.Pi / 3 +} diff --git a/vendor/gonum.org/v1/gonum/stat/distuv/lognormal.go b/vendor/gonum.org/v1/gonum/stat/distuv/lognormal.go new file mode 100644 index 0000000000..321c1bd30e --- /dev/null +++ b/vendor/gonum.org/v1/gonum/stat/distuv/lognormal.go @@ -0,0 +1,113 @@ +// Copyright ©2015 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package distuv + +import ( + "math" + "math/rand/v2" +) + +// LogNormal represents a random variable whose log is normally distributed. +// The probability density function is given by +// +// 1/(x σ √2π) exp(-(ln(x)-μ)^2)/(2σ^2)) +type LogNormal struct { + Mu float64 + Sigma float64 + Src rand.Source +} + +// CDF computes the value of the cumulative density function at x. +func (l LogNormal) CDF(x float64) float64 { + return 0.5 * math.Erfc(-(math.Log(x)-l.Mu)/(math.Sqrt2*l.Sigma)) +} + +// Entropy returns the differential entropy of the distribution. +func (l LogNormal) Entropy() float64 { + return 0.5 + 0.5*math.Log(2*math.Pi*l.Sigma*l.Sigma) + l.Mu +} + +// ExKurtosis returns the excess kurtosis of the distribution. +func (l LogNormal) ExKurtosis() float64 { + s2 := l.Sigma * l.Sigma + return math.Exp(4*s2) + 2*math.Exp(3*s2) + 3*math.Exp(2*s2) - 6 +} + +// LogProb computes the natural logarithm of the value of the probability density function at x. +func (l LogNormal) LogProb(x float64) float64 { + if x < 0 { + return math.Inf(-1) + } + logx := math.Log(x) + normdiff := (logx - l.Mu) / l.Sigma + return -0.5*normdiff*normdiff - logx - math.Log(l.Sigma) - logRoot2Pi +} + +// Mean returns the mean of the probability distribution. +func (l LogNormal) Mean() float64 { + return math.Exp(l.Mu + 0.5*l.Sigma*l.Sigma) +} + +// Median returns the median of the probability distribution. +func (l LogNormal) Median() float64 { + return math.Exp(l.Mu) +} + +// Mode returns the mode of the probability distribution. +func (l LogNormal) Mode() float64 { + return math.Exp(l.Mu - l.Sigma*l.Sigma) +} + +// NumParameters returns the number of parameters in the distribution. +func (LogNormal) NumParameters() int { + return 2 +} + +// Prob computes the value of the probability density function at x. +func (l LogNormal) Prob(x float64) float64 { + return math.Exp(l.LogProb(x)) +} + +// Quantile returns the inverse of the cumulative probability distribution. +func (l LogNormal) Quantile(p float64) float64 { + if p < 0 || p > 1 { + panic(badPercentile) + } + // Formula from http://www.math.uah.edu/stat/special/LogNormal.html. + return math.Exp(l.Mu + l.Sigma*UnitNormal.Quantile(p)) +} + +// Rand returns a random sample drawn from the distribution. +func (l LogNormal) Rand() float64 { + var rnd float64 + if l.Src == nil { + rnd = rand.NormFloat64() + } else { + rnd = rand.New(l.Src).NormFloat64() + } + return math.Exp(rnd*l.Sigma + l.Mu) +} + +// Skewness returns the skewness of the distribution. +func (l LogNormal) Skewness() float64 { + s2 := l.Sigma * l.Sigma + return (math.Exp(s2) + 2) * math.Sqrt(math.Exp(s2)-1) +} + +// StdDev returns the standard deviation of the probability distribution. +func (l LogNormal) StdDev() float64 { + return math.Sqrt(l.Variance()) +} + +// Survival returns the survival function (complementary CDF) at x. +func (l LogNormal) Survival(x float64) float64 { + return 0.5 * (1 - math.Erf((math.Log(x)-l.Mu)/(math.Sqrt2*l.Sigma))) +} + +// Variance returns the variance of the probability distribution. +func (l LogNormal) Variance() float64 { + s2 := l.Sigma * l.Sigma + return (math.Exp(s2) - 1) * math.Exp(2*l.Mu+s2) +} diff --git a/vendor/gonum.org/v1/gonum/stat/distuv/norm.go b/vendor/gonum.org/v1/gonum/stat/distuv/norm.go new file mode 100644 index 0000000000..324b70eccd --- /dev/null +++ b/vendor/gonum.org/v1/gonum/stat/distuv/norm.go @@ -0,0 +1,263 @@ +// Copyright ©2014 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package distuv + +import ( + "math" + "math/rand/v2" + + "gonum.org/v1/gonum/floats" + "gonum.org/v1/gonum/mathext" + "gonum.org/v1/gonum/stat" +) + +// UnitNormal is an instantiation of the normal distribution with Mu = 0 and Sigma = 1. +var UnitNormal = Normal{Mu: 0, Sigma: 1} + +// Normal represents a normal (Gaussian) distribution (https://en.wikipedia.org/wiki/Normal_distribution). +type Normal struct { + Mu float64 // Mean of the normal distribution + Sigma float64 // Standard deviation of the normal distribution + Src rand.Source + + // Needs to be Mu and Sigma and not Mean and StdDev because Normal has functions + // Mean and StdDev +} + +// CDF computes the value of the cumulative density function at x. +func (n Normal) CDF(x float64) float64 { + return 0.5 * math.Erfc(-(x-n.Mu)/(n.Sigma*math.Sqrt2)) +} + +// ConjugateUpdate updates the parameters of the distribution from the sufficient +// statistics of a set of samples. The sufficient statistics, suffStat, have been +// observed with nSamples observations. The prior values of the distribution are those +// currently in the distribution, and have been observed with priorStrength samples. +// +// For the normal distribution, the sufficient statistics are the mean and +// uncorrected standard deviation of the samples. +// The prior is having seen strength[0] samples with mean Normal.Mu +// and strength[1] samples with standard deviation Normal.Sigma. As a result of +// this function, Normal.Mu and Normal.Sigma are updated based on the weighted +// samples, and strength is modified to include the new number of samples observed. +// +// This function panics if len(suffStat) != n.NumSuffStat() or +// len(priorStrength) != n.NumSuffStat(). +func (n *Normal) ConjugateUpdate(suffStat []float64, nSamples float64, priorStrength []float64) { + // TODO: Support prior strength with math.Inf(1) to allow updating with + // a known mean/standard deviation + if len(suffStat) != n.NumSuffStat() { + panic("norm: incorrect suffStat length") + } + if len(priorStrength) != n.NumSuffStat() { + panic("norm: incorrect priorStrength length") + } + + totalMeanSamples := nSamples + priorStrength[0] + totalSum := suffStat[0]*nSamples + n.Mu*priorStrength[0] + + totalVarianceSamples := nSamples + priorStrength[1] + // sample variance + totalVariance := nSamples * suffStat[1] * suffStat[1] + // add prior variance + totalVariance += priorStrength[1] * n.Sigma * n.Sigma + // add cross variance from the difference of the means + meanDiff := (suffStat[0] - n.Mu) + totalVariance += priorStrength[0] * nSamples * meanDiff * meanDiff / totalMeanSamples + + n.Mu = totalSum / totalMeanSamples + n.Sigma = math.Sqrt(totalVariance / totalVarianceSamples) + floats.AddConst(nSamples, priorStrength) +} + +// Entropy returns the differential entropy of the distribution. +func (n Normal) Entropy() float64 { + return 0.5 * (log2Pi + 1 + 2*math.Log(n.Sigma)) +} + +// ExKurtosis returns the excess kurtosis of the distribution. +func (Normal) ExKurtosis() float64 { + return 0 +} + +// Fit sets the parameters of the probability distribution from the +// data samples x with relative weights w. If weights is nil, then all the weights +// are 1. If weights is not nil, then the len(weights) must equal len(samples). +func (n *Normal) Fit(samples, weights []float64) { + suffStat := make([]float64, n.NumSuffStat()) + nSamples := n.SuffStat(suffStat, samples, weights) + n.ConjugateUpdate(suffStat, nSamples, make([]float64, n.NumSuffStat())) +} + +// LogProb computes the natural logarithm of the value of the probability density function at x. +func (n Normal) LogProb(x float64) float64 { + return negLogRoot2Pi - math.Log(n.Sigma) - (x-n.Mu)*(x-n.Mu)/(2*n.Sigma*n.Sigma) +} + +// Mean returns the mean of the probability distribution. +func (n Normal) Mean() float64 { + return n.Mu +} + +// Median returns the median of the normal distribution. +func (n Normal) Median() float64 { + return n.Mu +} + +// Mode returns the mode of the normal distribution. +func (n Normal) Mode() float64 { + return n.Mu +} + +// NumParameters returns the number of parameters in the distribution. +func (Normal) NumParameters() int { + return 2 +} + +// NumSuffStat returns the number of sufficient statistics for the distribution. +func (Normal) NumSuffStat() int { + return 2 +} + +// Prob computes the value of the probability density function at x. +func (n Normal) Prob(x float64) float64 { + return math.Exp(n.LogProb(x)) +} + +// Quantile returns the inverse of the cumulative probability distribution. +func (n Normal) Quantile(p float64) float64 { + if p < 0 || p > 1 { + panic(badPercentile) + } + return n.Mu + n.Sigma*mathext.NormalQuantile(p) +} + +// Rand returns a random sample drawn from the distribution. +func (n Normal) Rand() float64 { + var rnd float64 + if n.Src == nil { + rnd = rand.NormFloat64() + } else { + rnd = rand.New(n.Src).NormFloat64() + } + return rnd*n.Sigma + n.Mu +} + +// Score returns the score function with respect to the parameters of the +// distribution at the input location x. The score function is the derivative +// of the log-likelihood at x with respect to the parameters +// +// (∂/∂θ) log(p(x;θ)) +// +// If deriv is non-nil, len(deriv) must equal the number of parameters otherwise +// Score will panic, and the derivative is stored in-place into deriv. If deriv +// is nil a new slice will be allocated and returned. +// +// The order is [∂LogProb / ∂Mu, ∂LogProb / ∂Sigma]. +// +// For more information, see https://en.wikipedia.org/wiki/Score_%28statistics%29. +func (n Normal) Score(deriv []float64, x float64) []float64 { + if deriv == nil { + deriv = make([]float64, n.NumParameters()) + } + if len(deriv) != n.NumParameters() { + panic(badLength) + } + deriv[0] = (x - n.Mu) / (n.Sigma * n.Sigma) + deriv[1] = 1 / n.Sigma * (-1 + ((x-n.Mu)/n.Sigma)*((x-n.Mu)/n.Sigma)) + return deriv +} + +// ScoreInput returns the score function with respect to the input of the +// distribution at the input location specified by x. The score function is the +// derivative of the log-likelihood +// +// (d/dx) log(p(x)) . +func (n Normal) ScoreInput(x float64) float64 { + return -(1 / (2 * n.Sigma * n.Sigma)) * 2 * (x - n.Mu) +} + +// Skewness returns the skewness of the distribution. +func (Normal) Skewness() float64 { + return 0 +} + +// StdDev returns the standard deviation of the probability distribution. +func (n Normal) StdDev() float64 { + return n.Sigma +} + +// SuffStat computes the sufficient statistics of a set of samples to update +// the distribution. The sufficient statistics are stored in place, and the +// effective number of samples are returned. +// +// The normal distribution has two sufficient statistics, the mean of the samples +// and the standard deviation of the samples. +// +// If weights is nil, the weights are assumed to be 1, otherwise panics if +// len(samples) != len(weights). Panics if len(suffStat) != NumSuffStat(). +func (Normal) SuffStat(suffStat, samples, weights []float64) (nSamples float64) { + lenSamp := len(samples) + if len(weights) != 0 && len(samples) != len(weights) { + panic(badLength) + } + if len(suffStat) != (Normal{}).NumSuffStat() { + panic(badSuffStat) + } + + if len(weights) == 0 { + nSamples = float64(lenSamp) + } else { + nSamples = floats.Sum(weights) + } + + mean := stat.Mean(samples, weights) + suffStat[0] = mean + + // Use Moment and not StdDev because we want it to be uncorrected + variance := stat.MomentAbout(2, samples, mean, weights) + suffStat[1] = math.Sqrt(variance) + return nSamples +} + +// Survival returns the survival function (complementary CDF) at x. +func (n Normal) Survival(x float64) float64 { + return 0.5 * (1 - math.Erf((x-n.Mu)/(n.Sigma*math.Sqrt2))) +} + +// setParameters modifies the parameters of the distribution. +func (n *Normal) setParameters(p []Parameter) { + if len(p) != n.NumParameters() { + panic("normal: incorrect number of parameters to set") + } + if p[0].Name != "Mu" { + panic("normal: " + panicNameMismatch) + } + if p[1].Name != "Sigma" { + panic("normal: " + panicNameMismatch) + } + n.Mu = p[0].Value + n.Sigma = p[1].Value +} + +// Variance returns the variance of the probability distribution. +func (n Normal) Variance() float64 { + return n.Sigma * n.Sigma +} + +// parameters returns the parameters of the distribution. +func (n Normal) parameters(p []Parameter) []Parameter { + nParam := n.NumParameters() + if p == nil { + p = make([]Parameter, nParam) + } else if len(p) != nParam { + panic("normal: improper parameter length") + } + p[0].Name = "Mu" + p[0].Value = n.Mu + p[1].Name = "Sigma" + p[1].Value = n.Sigma + return p +} diff --git a/vendor/gonum.org/v1/gonum/stat/distuv/pareto.go b/vendor/gonum.org/v1/gonum/stat/distuv/pareto.go new file mode 100644 index 0000000000..6ec751eaf2 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/stat/distuv/pareto.go @@ -0,0 +1,130 @@ +// Copyright ©2017 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package distuv + +import ( + "math" + "math/rand/v2" +) + +// Pareto implements the Pareto (Type I) distribution, a one parameter distribution +// with support above the scale parameter. +// +// The density function is given by +// +// (α x_m^{α})/(x^{α+1}) for x >= x_m. +// +// For more information, see https://en.wikipedia.org/wiki/Pareto_distribution. +type Pareto struct { + // Xm is the scale parameter. + // Xm must be greater than 0. + Xm float64 + + // Alpha is the shape parameter. + // Alpha must be greater than 0. + Alpha float64 + + Src rand.Source +} + +// CDF computes the value of the cumulative density function at x. +func (p Pareto) CDF(x float64) float64 { + if x < p.Xm { + return 0 + } + return -math.Expm1(p.Alpha * math.Log(p.Xm/x)) +} + +// Entropy returns the differential entropy of the distribution. +func (p Pareto) Entropy() float64 { + return math.Log(p.Xm) - math.Log(p.Alpha) + (1 + 1/p.Alpha) +} + +// ExKurtosis returns the excess kurtosis of the distribution. +func (p Pareto) ExKurtosis() float64 { + if p.Alpha <= 4 { + return math.NaN() + } + return 6 * (p.Alpha*p.Alpha*p.Alpha + p.Alpha*p.Alpha - 6*p.Alpha - 2) / (p.Alpha * (p.Alpha - 3) * (p.Alpha - 4)) + +} + +// LogProb computes the natural logarithm of the value of the probability +// density function at x. +func (p Pareto) LogProb(x float64) float64 { + if x < p.Xm { + return math.Inf(-1) + } + return math.Log(p.Alpha) + p.Alpha*math.Log(p.Xm) - (p.Alpha+1)*math.Log(x) +} + +// Mean returns the mean of the probability distribution. +func (p Pareto) Mean() float64 { + if p.Alpha <= 1 { + return math.Inf(1) + } + return p.Alpha * p.Xm / (p.Alpha - 1) +} + +// Median returns the median of the pareto distribution. +func (p Pareto) Median() float64 { + return p.Quantile(0.5) +} + +// Mode returns the mode of the distribution. +func (p Pareto) Mode() float64 { + return p.Xm +} + +// NumParameters returns the number of parameters in the distribution. +func (p Pareto) NumParameters() int { + return 2 +} + +// Prob computes the value of the probability density function at x. +func (p Pareto) Prob(x float64) float64 { + return math.Exp(p.LogProb(x)) +} + +// Quantile returns the inverse of the cumulative probability distribution. +func (p Pareto) Quantile(prob float64) float64 { + if prob < 0 || 1 < prob { + panic(badPercentile) + } + return p.Xm / math.Pow(1-prob, 1/p.Alpha) +} + +// Rand returns a random sample drawn from the distribution. +func (p Pareto) Rand() float64 { + var rnd float64 + if p.Src == nil { + rnd = rand.ExpFloat64() + } else { + rnd = rand.New(p.Src).ExpFloat64() + } + return p.Xm * math.Exp(rnd/p.Alpha) +} + +// StdDev returns the standard deviation of the probability distribution. +func (p Pareto) StdDev() float64 { + return math.Sqrt(p.Variance()) +} + +// Survival returns the survival function (complementary CDF) at x. +func (p Pareto) Survival(x float64) float64 { + if x < p.Xm { + return 1 + } + return math.Pow(p.Xm/x, p.Alpha) +} + +// Variance returns the variance of the probability distribution. +func (p Pareto) Variance() float64 { + if p.Alpha <= 2 { + return math.Inf(1) + } + am1 := p.Alpha - 1 + return p.Xm * p.Xm * p.Alpha / (am1 * am1 * (p.Alpha - 2)) +} diff --git a/vendor/gonum.org/v1/gonum/stat/distuv/poisson.go b/vendor/gonum.org/v1/gonum/stat/distuv/poisson.go new file mode 100644 index 0000000000..fcc8968ca7 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/stat/distuv/poisson.go @@ -0,0 +1,144 @@ +// Copyright ©2017 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package distuv + +import ( + "math" + "math/rand/v2" + + "gonum.org/v1/gonum/mathext" +) + +// Poisson implements the Poisson distribution, a discrete probability distribution +// that expresses the probability of a given number of events occurring in a fixed +// interval. +// The poisson distribution has density function: +// +// f(k) = λ^k / k! e^(-λ) +// +// For more information, see https://en.wikipedia.org/wiki/Poisson_distribution. +type Poisson struct { + // Lambda is the average number of events in an interval. + // Lambda must be greater than 0. + Lambda float64 + + Src rand.Source +} + +// CDF computes the value of the cumulative distribution function at x. +func (p Poisson) CDF(x float64) float64 { + if x < 0 { + return 0 + } + return mathext.GammaIncRegComp(math.Floor(x+1), p.Lambda) +} + +// ExKurtosis returns the excess kurtosis of the distribution. +func (p Poisson) ExKurtosis() float64 { + return 1 / p.Lambda +} + +// LogProb computes the natural logarithm of the value of the probability +// density function at x. +func (p Poisson) LogProb(x float64) float64 { + if x < 0 || math.Floor(x) != x { + return math.Inf(-1) + } + lg, _ := math.Lgamma(math.Floor(x) + 1) + return x*math.Log(p.Lambda) - p.Lambda - lg +} + +// Mean returns the mean of the probability distribution. +func (p Poisson) Mean() float64 { + return p.Lambda +} + +// NumParameters returns the number of parameters in the distribution. +func (Poisson) NumParameters() int { + return 1 +} + +// Prob computes the value of the probability density function at x. +func (p Poisson) Prob(x float64) float64 { + return math.Exp(p.LogProb(x)) +} + +// Rand returns a random sample drawn from the distribution. +func (p Poisson) Rand() float64 { + // NUMERICAL RECIPES IN C: THE ART OF SCIENTIFIC COMPUTING (ISBN 0-521-43108-5) + // p. 294 + // + + rnd := rand.ExpFloat64 + var rng *rand.Rand + if p.Src != nil { + rng = rand.New(p.Src) + rnd = rng.ExpFloat64 + } + + if p.Lambda < 10.0 { + // Use direct method. + var em float64 + t := 0.0 + for { + t += rnd() + if t >= p.Lambda { + break + } + em++ + } + return em + } + // Generate using: + // W. Hörmann. "The transformed rejection method for generating Poisson + // random variables." Insurance: Mathematics and Economics + // 12.1 (1993): 39-45. + + // Algorithm PTRS + rnd = rand.Float64 + if rng != nil { + rnd = rng.Float64 + } + b := 0.931 + 2.53*math.Sqrt(p.Lambda) + a := -0.059 + 0.02483*b + invalpha := 1.1239 + 1.1328/(b-3.4) + vr := 0.9277 - 3.6224/(b-2) + for { + U := rnd() - 0.5 + V := rnd() + us := 0.5 - math.Abs(U) + k := math.Floor((2*a/us+b)*U + p.Lambda + 0.43) + if us >= 0.07 && V <= vr { + return k + } + if k <= 0 || (us < 0.013 && V > us) { + continue + } + lg, _ := math.Lgamma(k + 1) + if math.Log(V*invalpha/(a/(us*us)+b)) <= k*math.Log(p.Lambda)-p.Lambda-lg { + return k + } + } +} + +// Skewness returns the skewness of the distribution. +func (p Poisson) Skewness() float64 { + return 1 / math.Sqrt(p.Lambda) +} + +// StdDev returns the standard deviation of the probability distribution. +func (p Poisson) StdDev() float64 { + return math.Sqrt(p.Variance()) +} + +// Survival returns the survival function (complementary CDF) at x. +func (p Poisson) Survival(x float64) float64 { + return 1 - p.CDF(x) +} + +// Variance returns the variance of the probability distribution. +func (p Poisson) Variance() float64 { + return p.Lambda +} diff --git a/vendor/gonum.org/v1/gonum/stat/distuv/statdist.go b/vendor/gonum.org/v1/gonum/stat/distuv/statdist.go new file mode 100644 index 0000000000..bf333db1be --- /dev/null +++ b/vendor/gonum.org/v1/gonum/stat/distuv/statdist.go @@ -0,0 +1,142 @@ +// Copyright ©2018 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package distuv + +import ( + "math" + + "gonum.org/v1/gonum/mathext" +) + +// Bhattacharyya is a type for computing the Bhattacharyya distance between +// probability distributions. +// +// The Bhattacharyya distance is defined as +// +// D_B = -ln(BC(l,r)) +// BC = \int_-∞^∞ (p(x)q(x))^(1/2) dx +// +// Where BC is known as the Bhattacharyya coefficient. +// The Bhattacharyya distance is related to the Hellinger distance by +// +// H(l,r) = sqrt(1-BC(l,r)) +// +// For more information, see +// +// https://en.wikipedia.org/wiki/Bhattacharyya_distance +type Bhattacharyya struct{} + +// DistBeta returns the Bhattacharyya distance between Beta distributions l and r. +// For Beta distributions, the Bhattacharyya distance is given by +// +// -ln(B((α_l + α_r)/2, (β_l + β_r)/2) / (B(α_l,β_l), B(α_r,β_r))) +// +// Where B is the Beta function. +func (Bhattacharyya) DistBeta(l, r Beta) float64 { + // Reference: https://en.wikipedia.org/wiki/Hellinger_distance#Examples + return -mathext.Lbeta((l.Alpha+r.Alpha)/2, (l.Beta+r.Beta)/2) + + 0.5*mathext.Lbeta(l.Alpha, l.Beta) + 0.5*mathext.Lbeta(r.Alpha, r.Beta) +} + +// DistNormal returns the Bhattacharyya distance Normal distributions l and r. +// For Normal distributions, the Bhattacharyya distance is given by +// +// s = (σ_l^2 + σ_r^2)/2 +// BC = 1/8 (μ_l-μ_r)^2/s + 1/2 ln(s/(σ_l*σ_r)) +func (Bhattacharyya) DistNormal(l, r Normal) float64 { + // Reference: https://en.wikipedia.org/wiki/Bhattacharyya_distance + m := l.Mu - r.Mu + s := (l.Sigma*l.Sigma + r.Sigma*r.Sigma) / 2 + return 0.125*m*m/s + 0.5*math.Log(s) - 0.5*math.Log(l.Sigma) - 0.5*math.Log(r.Sigma) +} + +// Hellinger is a type for computing the Hellinger distance between probability +// distributions. +// +// The Hellinger distance is defined as +// +// H^2(l,r) = 1/2 * int_x (\sqrt(l(x)) - \sqrt(r(x)))^2 dx +// +// and is bounded between 0 and 1. Note the above formula defines the squared +// Hellinger distance, while this returns the Hellinger distance itself. +// The Hellinger distance is related to the Bhattacharyya distance by +// +// H^2 = 1 - exp(-D_B) +// +// For more information, see +// +// https://en.wikipedia.org/wiki/Hellinger_distance +type Hellinger struct{} + +// DistBeta computes the Hellinger distance between Beta distributions l and r. +// See the documentation of Bhattacharyya.DistBeta for the distance formula. +func (Hellinger) DistBeta(l, r Beta) float64 { + db := Bhattacharyya{}.DistBeta(l, r) + return math.Sqrt(-math.Expm1(-db)) +} + +// DistNormal computes the Hellinger distance between Normal distributions l and r. +// See the documentation of Bhattacharyya.DistNormal for the distance formula. +func (Hellinger) DistNormal(l, r Normal) float64 { + db := Bhattacharyya{}.DistNormal(l, r) + return math.Sqrt(-math.Expm1(-db)) +} + +// KullbackLeibler is a type for computing the Kullback-Leibler divergence from l to r. +// +// The Kullback-Leibler divergence is defined as +// +// D_KL(l || r ) = \int_x p(x) log(p(x)/q(x)) dx +// +// Note that the Kullback-Leibler divergence is not symmetric with respect to +// the order of the input arguments. +type KullbackLeibler struct{} + +// DistBeta returns the Kullback-Leibler divergence between Beta distributions +// l and r. +// +// For two Beta distributions, the KL divergence is computed as +// +// D_KL(l || r) = log Γ(α_l+β_l) - log Γ(α_l) - log Γ(β_l) +// - log Γ(α_r+β_r) + log Γ(α_r) + log Γ(β_r) +// + (α_l-α_r)(ψ(α_l)-ψ(α_l+β_l)) + (β_l-β_r)(ψ(β_l)-ψ(α_l+β_l)) +// +// Where Γ is the gamma function and ψ is the digamma function. +func (KullbackLeibler) DistBeta(l, r Beta) float64 { + // http://bariskurt.com/kullback-leibler-divergence-between-two-dirichlet-and-beta-distributions/ + if l.Alpha <= 0 || l.Beta <= 0 { + panic("distuv: bad parameters for left distribution") + } + if r.Alpha <= 0 || r.Beta <= 0 { + panic("distuv: bad parameters for right distribution") + } + lab := l.Alpha + l.Beta + l1, _ := math.Lgamma(lab) + l2, _ := math.Lgamma(l.Alpha) + l3, _ := math.Lgamma(l.Beta) + lt := l1 - l2 - l3 + + r1, _ := math.Lgamma(r.Alpha + r.Beta) + r2, _ := math.Lgamma(r.Alpha) + r3, _ := math.Lgamma(r.Beta) + rt := r1 - r2 - r3 + + d0 := mathext.Digamma(l.Alpha + l.Beta) + ct := (l.Alpha-r.Alpha)*(mathext.Digamma(l.Alpha)-d0) + (l.Beta-r.Beta)*(mathext.Digamma(l.Beta)-d0) + + return lt - rt + ct +} + +// DistNormal returns the Kullback-Leibler divergence between Normal distributions +// l and r. +// +// For two Normal distributions, the KL divergence is computed as +// +// D_KL(l || r) = log(σ_r / σ_l) + (σ_l^2 + (μ_l-μ_r)^2)/(2 * σ_r^2) - 0.5 +func (KullbackLeibler) DistNormal(l, r Normal) float64 { + d := l.Mu - r.Mu + v := (l.Sigma*l.Sigma + d*d) / (2 * r.Sigma * r.Sigma) + return math.Log(r.Sigma) - math.Log(l.Sigma) + v - 0.5 +} diff --git a/vendor/gonum.org/v1/gonum/stat/distuv/studentst.go b/vendor/gonum.org/v1/gonum/stat/distuv/studentst.go new file mode 100644 index 0000000000..3bbb1526be --- /dev/null +++ b/vendor/gonum.org/v1/gonum/stat/distuv/studentst.go @@ -0,0 +1,161 @@ +// Copyright ©2016 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package distuv + +import ( + "math" + "math/rand/v2" + + "gonum.org/v1/gonum/mathext" +) + +const logPi = 1.1447298858494001741 // http://oeis.org/A053510 + +// StudentsT implements the three-parameter Student's T distribution, a distribution +// over the real numbers. +// +// The Student's T distribution has density function +// +// Γ((ν+1)/2) / (sqrt(νπ) Γ(ν/2) σ) (1 + 1/ν * ((x-μ)/σ)^2)^(-(ν+1)/2) +// +// The Student's T distribution approaches the normal distribution as ν → ∞. +// +// For more information, see https://en.wikipedia.org/wiki/Student%27s_t-distribution, +// specifically https://en.wikipedia.org/wiki/Student%27s_t-distribution#Non-standardized_Student.27s_t-distribution . +// +// The standard Student's T distribution is with Mu = 0, and Sigma = 1. +type StudentsT struct { + // Mu is the location parameter of the distribution, and the mean of the + // distribution + Mu float64 + + // Sigma is the scale parameter of the distribution. It is related to the + // standard deviation by std = Sigma * sqrt(Nu/(Nu-2)) + Sigma float64 + + // Nu is the shape parameter of the distribution, representing the number of + // degrees of the distribution, and one less than the number of observations + // from a Normal distribution. + Nu float64 + + Src rand.Source +} + +// CDF computes the value of the cumulative distribution function at x. +func (s StudentsT) CDF(x float64) float64 { + // transform to standard normal + y := (x - s.Mu) / s.Sigma + if y == 0 { + return 0.5 + } + // For t > 0 + // F(y) = 1 - 0.5 * I_t(y)(nu/2, 1/2) + // t(y) = nu/(y^2 + nu) + // and 1 - F(y) for t < 0 + t := s.Nu / (y*y + s.Nu) + if y > 0 { + return 1 - 0.5*mathext.RegIncBeta(0.5*s.Nu, 0.5, t) + } + return 0.5 * mathext.RegIncBeta(s.Nu/2, 0.5, t) +} + +// LogProb computes the natural logarithm of the value of the probability +// density function at x. +func (s StudentsT) LogProb(x float64) float64 { + g1, _ := math.Lgamma((s.Nu + 1) / 2) + g2, _ := math.Lgamma(s.Nu / 2) + z := (x - s.Mu) / s.Sigma + return g1 - g2 - 0.5*math.Log(s.Nu) - 0.5*logPi - math.Log(s.Sigma) - ((s.Nu+1)/2)*math.Log(1+z*z/s.Nu) +} + +// Mean returns the mean of the probability distribution. +func (s StudentsT) Mean() float64 { + return s.Mu +} + +// Mode returns the mode of the distribution. +func (s StudentsT) Mode() float64 { + return s.Mu +} + +// NumParameters returns the number of parameters in the distribution. +func (StudentsT) NumParameters() int { + return 3 +} + +// Prob computes the value of the probability density function at x. +func (s StudentsT) Prob(x float64) float64 { + return math.Exp(s.LogProb(x)) +} + +// Quantile returns the inverse of the cumulative distribution function. +func (s StudentsT) Quantile(p float64) float64 { + if p < 0 || p > 1 { + panic(badPercentile) + } + // F(x) = 1 - 0.5 * I_t(x)(nu/2, 1/2) + // t(x) = nu/(t^2 + nu) + if p == 0.5 { + return s.Mu + } + var y float64 + if p > 0.5 { + // Know t > 0 + t := mathext.InvRegIncBeta(s.Nu/2, 0.5, 2*(1-p)) + y = math.Sqrt(s.Nu * (1 - t) / t) + } else { + t := mathext.InvRegIncBeta(s.Nu/2, 0.5, 2*p) + y = -math.Sqrt(s.Nu * (1 - t) / t) + } + // Convert out of standard normal + return y*s.Sigma + s.Mu +} + +// Rand returns a random sample drawn from the distribution. +func (s StudentsT) Rand() float64 { + // http://www.math.uah.edu/stat/special/Student.html + n := Normal{0, 1, s.Src}.Rand() + c := Gamma{s.Nu / 2, 0.5, s.Src}.Rand() + z := n / math.Sqrt(c/s.Nu) + return z*s.Sigma + s.Mu +} + +// StdDev returns the standard deviation of the probability distribution. +// +// The standard deviation is undefined for ν <= 1, and this returns math.NaN(). +func (s StudentsT) StdDev() float64 { + return math.Sqrt(s.Variance()) +} + +// Survival returns the survival function (complementary CDF) at x. +func (s StudentsT) Survival(x float64) float64 { + // transform to standard normal + y := (x - s.Mu) / s.Sigma + if y == 0 { + return 0.5 + } + // For t > 0 + // F(y) = 1 - 0.5 * I_t(y)(nu/2, 1/2) + // t(y) = nu/(y^2 + nu) + // and 1 - F(y) for t < 0 + t := s.Nu / (y*y + s.Nu) + if y > 0 { + return 0.5 * mathext.RegIncBeta(s.Nu/2, 0.5, t) + } + return 1 - 0.5*mathext.RegIncBeta(s.Nu/2, 0.5, t) +} + +// Variance returns the variance of the probability distribution. +// +// The variance is undefined for ν <= 1, and this returns math.NaN(). +func (s StudentsT) Variance() float64 { + if s.Nu <= 1 { + return math.NaN() + } + if s.Nu <= 2 { + return math.Inf(1) + } + return s.Sigma * s.Sigma * s.Nu / (s.Nu - 2) +} diff --git a/vendor/gonum.org/v1/gonum/stat/distuv/triangle.go b/vendor/gonum.org/v1/gonum/stat/distuv/triangle.go new file mode 100644 index 0000000000..20240bbe4a --- /dev/null +++ b/vendor/gonum.org/v1/gonum/stat/distuv/triangle.go @@ -0,0 +1,278 @@ +// Copyright ©2017 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package distuv + +import ( + "math" + "math/rand/v2" +) + +// Triangle represents a triangle distribution (https://en.wikipedia.org/wiki/Triangular_distribution). +type Triangle struct { + a, b, c float64 + src rand.Source +} + +// NewTriangle constructs a new triangle distribution with lower limit a, upper limit b, and mode c. +// Constraints are a < b and a ≤ c ≤ b. +// This distribution is uncommon in nature, but may be useful for simulation. +func NewTriangle(a, b, c float64, src rand.Source) Triangle { + checkTriangleParameters(a, b, c) + return Triangle{a: a, b: b, c: c, src: src} +} + +func checkTriangleParameters(a, b, c float64) { + if a >= b { + panic("triangle: constraint of a < b violated") + } + if a > c { + panic("triangle: constraint of a <= c violated") + } + if c > b { + panic("triangle: constraint of c <= b violated") + } +} + +// CDF computes the value of the cumulative density function at x. +func (t Triangle) CDF(x float64) float64 { + switch { + case x <= t.a: + return 0 + case x <= t.c: + d := x - t.a + return (d * d) / ((t.b - t.a) * (t.c - t.a)) + case x < t.b: + d := t.b - x + return 1 - (d*d)/((t.b-t.a)*(t.b-t.c)) + default: + return 1 + } +} + +// Entropy returns the entropy of the distribution. +func (t Triangle) Entropy() float64 { + return 0.5 + math.Log(t.b-t.a) - math.Ln2 +} + +// ExKurtosis returns the excess kurtosis of the distribution. +func (Triangle) ExKurtosis() float64 { + return -3.0 / 5.0 +} + +// Fit is not appropriate for Triangle, because the distribution is generally used when there is little data. + +// LogProb computes the natural logarithm of the value of the probability density function at x. +func (t Triangle) LogProb(x float64) float64 { + return math.Log(t.Prob(x)) +} + +// Mean returns the mean of the probability distribution. +func (t Triangle) Mean() float64 { + return (t.a + t.b + t.c) / 3 +} + +// Median returns the median of the probability distribution. +func (t Triangle) Median() float64 { + if t.c >= (t.a+t.b)/2 { + return t.a + math.Sqrt((t.b-t.a)*(t.c-t.a)/2) + } + return t.b - math.Sqrt((t.b-t.a)*(t.b-t.c)/2) +} + +// Mode returns the mode of the probability distribution. +func (t Triangle) Mode() float64 { + return t.c +} + +// NumParameters returns the number of parameters in the distribution. +func (Triangle) NumParameters() int { + return 3 +} + +// Prob computes the value of the probability density function at x. +func (t Triangle) Prob(x float64) float64 { + switch { + case x < t.a: + return 0 + case x < t.c: + return 2 * (x - t.a) / ((t.b - t.a) * (t.c - t.a)) + case x == t.c: + return 2 / (t.b - t.a) + case x <= t.b: + return 2 * (t.b - x) / ((t.b - t.a) * (t.b - t.c)) + default: + return 0 + } +} + +// Quantile returns the inverse of the cumulative probability distribution. +func (t Triangle) Quantile(p float64) float64 { + if p < 0 || p > 1 { + panic(badPercentile) + } + + f := (t.c - t.a) / (t.b - t.a) + + if p < f { + return t.a + math.Sqrt(p*(t.b-t.a)*(t.c-t.a)) + } + return t.b - math.Sqrt((1-p)*(t.b-t.a)*(t.b-t.c)) +} + +// Rand returns a random sample drawn from the distribution. +func (t Triangle) Rand() float64 { + var rnd float64 + if t.src == nil { + rnd = rand.Float64() + } else { + rnd = rand.New(t.src).Float64() + } + + return t.Quantile(rnd) +} + +// Score returns the score function with respect to the parameters of the +// distribution at the input location x. The score function is the derivative +// of the log-likelihood at x with respect to the parameters +// +// (∂/∂θ) log(p(x;θ)) +// +// If deriv is non-nil, len(deriv) must equal the number of parameters otherwise +// Score will panic, and the derivative is stored in-place into deriv. If deriv +// is nil a new slice will be allocated and returned. +// +// The order is [∂LogProb / ∂Mu, ∂LogProb / ∂Sigma]. +// +// For more information, see https://en.wikipedia.org/wiki/Score_%28statistics%29. +func (t Triangle) Score(deriv []float64, x float64) []float64 { + if deriv == nil { + deriv = make([]float64, t.NumParameters()) + } + if len(deriv) != t.NumParameters() { + panic(badLength) + } + if (x < t.a) || (x > t.b) { + deriv[0] = math.NaN() + deriv[1] = math.NaN() + deriv[2] = math.NaN() + } else { + invBA := 1 / (t.b - t.a) + invCA := 1 / (t.c - t.a) + invBC := 1 / (t.b - t.c) + switch { + case x < t.c: + deriv[0] = -1/(x-t.a) + invBA + invCA + deriv[1] = -invBA + deriv[2] = -invCA + case x > t.c: + deriv[0] = invBA + deriv[1] = 1/(t.b-x) - invBA - invBC + deriv[2] = invBC + default: + deriv[0] = invBA + deriv[1] = -invBA + deriv[2] = 0 + } + switch { + case x == t.a: + deriv[0] = math.NaN() + case x == t.b: + deriv[1] = math.NaN() + case x == t.c: + deriv[2] = math.NaN() + } + switch { + case t.a == t.c: + deriv[0] = math.NaN() + deriv[2] = math.NaN() + case t.b == t.c: + deriv[1] = math.NaN() + deriv[2] = math.NaN() + } + } + return deriv +} + +// ScoreInput returns the score function with respect to the input of the +// distribution at the input location specified by x. The score function is the +// derivative of the log-likelihood +// +// (d/dx) log(p(x)) . +// +// Special cases (c is the mode of the distribution): +// +// ScoreInput(c) = NaN +// ScoreInput(x) = NaN for x not in (a, b) +func (t Triangle) ScoreInput(x float64) float64 { + if (x <= t.a) || (x >= t.b) || (x == t.c) { + return math.NaN() + } + if x < t.c { + return 1 / (x - t.a) + } + return 1 / (x - t.b) +} + +// Skewness returns the skewness of the distribution. +func (t Triangle) Skewness() float64 { + n := math.Sqrt2 * (t.a + t.b - 2*t.c) * (2*t.a - t.b - t.c) * (t.a - 2*t.b + t.c) + d := 5 * math.Pow(t.a*t.a+t.b*t.b+t.c*t.c-t.a*t.b-t.a*t.c-t.b*t.c, 3.0/2.0) + + return n / d +} + +// StdDev returns the standard deviation of the probability distribution. +func (t Triangle) StdDev() float64 { + return math.Sqrt(t.Variance()) +} + +// Survival returns the survival function (complementary CDF) at x. +func (t Triangle) Survival(x float64) float64 { + return 1 - t.CDF(x) +} + +// parameters returns the parameters of the distribution. +func (t Triangle) parameters(p []Parameter) []Parameter { + nParam := t.NumParameters() + if p == nil { + p = make([]Parameter, nParam) + } else if len(p) != nParam { + panic("triangle: improper parameter length") + } + p[0].Name = "A" + p[0].Value = t.a + p[1].Name = "B" + p[1].Value = t.b + p[2].Name = "C" + p[2].Value = t.c + return p +} + +// setParameters modifies the parameters of the distribution. +func (t *Triangle) setParameters(p []Parameter) { + if len(p) != t.NumParameters() { + panic("triangle: incorrect number of parameters to set") + } + if p[0].Name != "A" { + panic("triangle: " + panicNameMismatch) + } + if p[1].Name != "B" { + panic("triangle: " + panicNameMismatch) + } + if p[2].Name != "C" { + panic("triangle: " + panicNameMismatch) + } + + checkTriangleParameters(p[0].Value, p[1].Value, p[2].Value) + + t.a = p[0].Value + t.b = p[1].Value + t.c = p[2].Value +} + +// Variance returns the variance of the probability distribution. +func (t Triangle) Variance() float64 { + return (t.a*t.a + t.b*t.b + t.c*t.c - t.a*t.b - t.a*t.c - t.b*t.c) / 18 +} diff --git a/vendor/gonum.org/v1/gonum/stat/distuv/uniform.go b/vendor/gonum.org/v1/gonum/stat/distuv/uniform.go new file mode 100644 index 0000000000..3f555e335a --- /dev/null +++ b/vendor/gonum.org/v1/gonum/stat/distuv/uniform.go @@ -0,0 +1,210 @@ +// Copyright ©2014 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package distuv + +import ( + "math" + "math/rand/v2" +) + +// UnitUniform is an instantiation of the uniform distribution with Min = 0 +// and Max = 1. +var UnitUniform = Uniform{Min: 0, Max: 1} + +// Uniform represents a continuous uniform distribution (https://en.wikipedia.org/wiki/Uniform_distribution_%28continuous%29). +type Uniform struct { + Min float64 + Max float64 + Src rand.Source +} + +// CDF computes the value of the cumulative density function at x. +func (u Uniform) CDF(x float64) float64 { + if x < u.Min { + return 0 + } + if x > u.Max { + return 1 + } + return (x - u.Min) / (u.Max - u.Min) +} + +// Uniform doesn't have any of the DLogProbD? because the derivative is 0 everywhere +// except where it's undefined + +// Entropy returns the entropy of the distribution. +func (u Uniform) Entropy() float64 { + return math.Log(u.Max - u.Min) +} + +// ExKurtosis returns the excess kurtosis of the distribution. +func (Uniform) ExKurtosis() float64 { + return -6.0 / 5.0 +} + +// Uniform doesn't have Fit because it's a bad idea to fit a uniform from data. + +// LogProb computes the natural logarithm of the value of the probability density function at x. +func (u Uniform) LogProb(x float64) float64 { + if x < u.Min { + return math.Inf(-1) + } + if x > u.Max { + return math.Inf(-1) + } + return -math.Log(u.Max - u.Min) +} + +// parameters returns the parameters of the distribution. +func (u Uniform) parameters(p []Parameter) []Parameter { + nParam := u.NumParameters() + if p == nil { + p = make([]Parameter, nParam) + } else if len(p) != nParam { + panic("uniform: improper parameter length") + } + p[0].Name = "Min" + p[0].Value = u.Min + p[1].Name = "Max" + p[1].Value = u.Max + return p +} + +// Mean returns the mean of the probability distribution. +func (u Uniform) Mean() float64 { + return (u.Max + u.Min) / 2 +} + +// Median returns the median of the probability distribution. +func (u Uniform) Median() float64 { + return (u.Max + u.Min) / 2 +} + +// Uniform doesn't have a mode because it's any value in the distribution + +// NumParameters returns the number of parameters in the distribution. +func (Uniform) NumParameters() int { + return 2 +} + +// Prob computes the value of the probability density function at x. +func (u Uniform) Prob(x float64) float64 { + if x < u.Min { + return 0 + } + if x > u.Max { + return 0 + } + return 1 / (u.Max - u.Min) +} + +// Quantile returns the inverse of the cumulative probability distribution. +func (u Uniform) Quantile(p float64) float64 { + if p < 0 || p > 1 { + panic(badPercentile) + } + return p*(u.Max-u.Min) + u.Min +} + +// Rand returns a random sample drawn from the distribution. +func (u Uniform) Rand() float64 { + var rnd float64 + if u.Src == nil { + rnd = rand.Float64() + } else { + rnd = rand.New(u.Src).Float64() + } + return rnd*(u.Max-u.Min) + u.Min +} + +// Score returns the score function with respect to the parameters of the +// distribution at the input location x. The score function is the derivative +// of the log-likelihood at x with respect to the parameters +// +// (∂/∂θ) log(p(x;θ)) +// +// If deriv is non-nil, len(deriv) must equal the number of parameters otherwise +// Score will panic, and the derivative is stored in-place into deriv. If deriv +// is nil a new slice will be allocated and returned. +// +// The order is [∂LogProb / ∂Mu, ∂LogProb / ∂Sigma]. +// +// For more information, see https://en.wikipedia.org/wiki/Score_%28statistics%29. +func (u Uniform) Score(deriv []float64, x float64) []float64 { + if deriv == nil { + deriv = make([]float64, u.NumParameters()) + } + if len(deriv) != u.NumParameters() { + panic(badLength) + } + if (x < u.Min) || (x > u.Max) { + deriv[0] = math.NaN() + deriv[1] = math.NaN() + } else { + deriv[0] = 1 / (u.Max - u.Min) + deriv[1] = -deriv[0] + if x == u.Min { + deriv[0] = math.NaN() + } + if x == u.Max { + deriv[1] = math.NaN() + } + } + return deriv +} + +// ScoreInput returns the score function with respect to the input of the +// distribution at the input location specified by x. The score function is the +// derivative of the log-likelihood +// +// (d/dx) log(p(x)) . +func (u Uniform) ScoreInput(x float64) float64 { + if (x <= u.Min) || (x >= u.Max) { + return math.NaN() + } + return 0 +} + +// Skewness returns the skewness of the distribution. +func (Uniform) Skewness() float64 { + return 0 +} + +// StdDev returns the standard deviation of the probability distribution. +func (u Uniform) StdDev() float64 { + return math.Sqrt(u.Variance()) +} + +// Survival returns the survival function (complementary CDF) at x. +func (u Uniform) Survival(x float64) float64 { + if x < u.Min { + return 1 + } + if x > u.Max { + return 0 + } + return (u.Max - x) / (u.Max - u.Min) +} + +// setParameters modifies the parameters of the distribution. +func (u *Uniform) setParameters(p []Parameter) { + if len(p) != u.NumParameters() { + panic("uniform: incorrect number of parameters to set") + } + if p[0].Name != "Min" { + panic("uniform: " + panicNameMismatch) + } + if p[1].Name != "Max" { + panic("uniform: " + panicNameMismatch) + } + + u.Min = p[0].Value + u.Max = p[1].Value +} + +// Variance returns the variance of the probability distribution. +func (u Uniform) Variance() float64 { + return 1.0 / 12.0 * (u.Max - u.Min) * (u.Max - u.Min) +} diff --git a/vendor/gonum.org/v1/gonum/stat/distuv/weibull.go b/vendor/gonum.org/v1/gonum/stat/distuv/weibull.go new file mode 100644 index 0000000000..4042a8a901 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/stat/distuv/weibull.go @@ -0,0 +1,231 @@ +// Copyright ©2014 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package distuv + +import ( + "math" + "math/rand/v2" +) + +// Weibull distribution. Valid range for x is [0,+∞). +type Weibull struct { + // Shape parameter of the distribution. A value of 1 represents + // the exponential distribution. A value of 2 represents the + // Rayleigh distribution. Valid range is (0,+∞). + K float64 + // Scale parameter of the distribution. Valid range is (0,+∞). + Lambda float64 + // Source of random numbers + Src rand.Source +} + +// CDF computes the value of the cumulative density function at x. +func (w Weibull) CDF(x float64) float64 { + if x < 0 { + return 0 + } + return -math.Expm1(-math.Pow(x/w.Lambda, w.K)) +} + +// Entropy returns the entropy of the distribution. +func (w Weibull) Entropy() float64 { + return eulerGamma*(1-1/w.K) + math.Log(w.Lambda/w.K) + 1 +} + +// ExKurtosis returns the excess kurtosis of the distribution. +func (w Weibull) ExKurtosis() float64 { + return (-6*w.gammaIPow(1, 4) + 12*w.gammaIPow(1, 2)*math.Gamma(1+2/w.K) - 3*w.gammaIPow(2, 2) - 4*math.Gamma(1+1/w.K)*math.Gamma(1+3/w.K) + math.Gamma(1+4/w.K)) / math.Pow(math.Gamma(1+2/w.K)-w.gammaIPow(1, 2), 2) +} + +// gammIPow is a shortcut for computing the gamma function to a power. +func (w Weibull) gammaIPow(i, pow float64) float64 { + return math.Pow(math.Gamma(1+i/w.K), pow) +} + +// LogProb computes the natural logarithm of the value of the probability +// density function at x. -Inf is returned if x is less than zero. +// +// Special cases occur when x == 0, and the result depends on the shape +// parameter as follows: +// +// If 0 < K < 1, LogProb returns +Inf. +// If K == 1, LogProb returns 0. +// If K > 1, LogProb returns -Inf. +func (w Weibull) LogProb(x float64) float64 { + if x < 0 { + return math.Inf(-1) + } + if x == 0 && w.K == 1 { + return 0 + } + return math.Log(w.K) - math.Log(w.Lambda) + (w.K-1)*(math.Log(x)-math.Log(w.Lambda)) - math.Pow(x/w.Lambda, w.K) +} + +// LogSurvival returns the log of the survival function (complementary CDF) at x. +func (w Weibull) LogSurvival(x float64) float64 { + if x < 0 { + return 0 + } + return -math.Pow(x/w.Lambda, w.K) +} + +// Mean returns the mean of the probability distribution. +func (w Weibull) Mean() float64 { + return w.Lambda * math.Gamma(1+1/w.K) +} + +// Median returns the median of the normal distribution. +func (w Weibull) Median() float64 { + return w.Lambda * math.Pow(ln2, 1/w.K) +} + +// Mode returns the mode of the normal distribution. +// +// The mode is NaN in the special case where the K (shape) parameter +// is less than 1. +func (w Weibull) Mode() float64 { + if w.K > 1 { + return w.Lambda * math.Pow((w.K-1)/w.K, 1/w.K) + } + return 0 +} + +// NumParameters returns the number of parameters in the distribution. +func (Weibull) NumParameters() int { + return 2 +} + +// Prob computes the value of the probability density function at x. +func (w Weibull) Prob(x float64) float64 { + if x < 0 { + return 0 + } + return math.Exp(w.LogProb(x)) +} + +// Quantile returns the inverse of the cumulative probability distribution. +func (w Weibull) Quantile(p float64) float64 { + if p < 0 || p > 1 { + panic(badPercentile) + } + return w.Lambda * math.Pow(-math.Log(1-p), 1/w.K) +} + +// Rand returns a random sample drawn from the distribution. +func (w Weibull) Rand() float64 { + var rnd float64 + if w.Src == nil { + rnd = rand.Float64() + } else { + rnd = rand.New(w.Src).Float64() + } + return w.Quantile(rnd) +} + +// Score returns the score function with respect to the parameters of the +// distribution at the input location x. The score function is the derivative +// of the log-likelihood at x with respect to the parameters +// +// (∂/∂θ) log(p(x;θ)) +// +// If deriv is non-nil, len(deriv) must equal the number of parameters otherwise +// Score will panic, and the derivative is stored in-place into deriv. If deriv +// is nil a new slice will be allocated and returned. +// +// The order is [∂LogProb / ∂K, ∂LogProb / ∂λ]. +// +// For more information, see https://en.wikipedia.org/wiki/Score_%28statistics%29. +// +// Special cases: +// +// Score(x) = [NaN, NaN] for x <= 0 +func (w Weibull) Score(deriv []float64, x float64) []float64 { + if deriv == nil { + deriv = make([]float64, w.NumParameters()) + } + if len(deriv) != w.NumParameters() { + panic(badLength) + } + if x > 0 { + deriv[0] = 1/w.K + math.Log(x) - math.Log(w.Lambda) - (math.Log(x)-math.Log(w.Lambda))*math.Pow(x/w.Lambda, w.K) + deriv[1] = (w.K * (math.Pow(x/w.Lambda, w.K) - 1)) / w.Lambda + return deriv + } + deriv[0] = math.NaN() + deriv[1] = math.NaN() + return deriv +} + +// ScoreInput returns the score function with respect to the input of the +// distribution at the input location specified by x. The score function is the +// derivative of the log-likelihood +// +// (d/dx) log(p(x)) . +// +// Special cases: +// +// ScoreInput(x) = NaN for x <= 0 +func (w Weibull) ScoreInput(x float64) float64 { + if x > 0 { + return (-w.K*math.Pow(x/w.Lambda, w.K) + w.K - 1) / x + } + return math.NaN() +} + +// Skewness returns the skewness of the distribution. +func (w Weibull) Skewness() float64 { + stdDev := w.StdDev() + firstGamma, firstGammaSign := math.Lgamma(1 + 3/w.K) + logFirst := firstGamma + 3*(math.Log(w.Lambda)-math.Log(stdDev)) + logSecond := math.Log(3) + math.Log(w.Mean()) + 2*math.Log(stdDev) - 3*math.Log(stdDev) + logThird := 3 * (math.Log(w.Mean()) - math.Log(stdDev)) + return float64(firstGammaSign)*math.Exp(logFirst) - math.Exp(logSecond) - math.Exp(logThird) +} + +// StdDev returns the standard deviation of the probability distribution. +func (w Weibull) StdDev() float64 { + return math.Sqrt(w.Variance()) +} + +// Survival returns the survival function (complementary CDF) at x. +func (w Weibull) Survival(x float64) float64 { + return math.Exp(w.LogSurvival(x)) +} + +// setParameters modifies the parameters of the distribution. +func (w *Weibull) setParameters(p []Parameter) { + if len(p) != w.NumParameters() { + panic("weibull: incorrect number of parameters to set") + } + if p[0].Name != "K" { + panic("weibull: " + panicNameMismatch) + } + if p[1].Name != "λ" { + panic("weibull: " + panicNameMismatch) + } + w.K = p[0].Value + w.Lambda = p[1].Value +} + +// Variance returns the variance of the probability distribution. +func (w Weibull) Variance() float64 { + return math.Pow(w.Lambda, 2) * (math.Gamma(1+2/w.K) - w.gammaIPow(1, 2)) +} + +// parameters returns the parameters of the distribution. +func (w Weibull) parameters(p []Parameter) []Parameter { + nParam := w.NumParameters() + if p == nil { + p = make([]Parameter, nParam) + } else if len(p) != nParam { + panic("weibull: improper parameter length") + } + p[0].Name = "K" + p[0].Value = w.K + p[1].Name = "λ" + p[1].Value = w.Lambda + return p + +} diff --git a/vendor/gonum.org/v1/gonum/stat/doc.go b/vendor/gonum.org/v1/gonum/stat/doc.go new file mode 100644 index 0000000000..d6916cb252 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/stat/doc.go @@ -0,0 +1,6 @@ +// Copyright ©2017 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package stat provides generalized statistical functions. +package stat // import "gonum.org/v1/gonum/stat" diff --git a/vendor/gonum.org/v1/gonum/stat/pca_cca.go b/vendor/gonum.org/v1/gonum/stat/pca_cca.go new file mode 100644 index 0000000000..1cc92ebf11 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/stat/pca_cca.go @@ -0,0 +1,317 @@ +// Copyright ©2016 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package stat + +import ( + "errors" + "math" + + "gonum.org/v1/gonum/floats" + "gonum.org/v1/gonum/mat" +) + +// PC is a type for computing and extracting the principal components of a +// matrix. The results of the principal components analysis are only valid +// if the call to PrincipalComponents was successful. +type PC struct { + n, d int + weights []float64 + svd *mat.SVD + ok bool +} + +// PrincipalComponents performs a weighted principal components analysis on the +// matrix of the input data which is represented as an n×d matrix a where each +// row is an observation and each column is a variable. +// +// PrincipalComponents centers the variables but does not scale the variance. +// +// The weights slice is used to weight the observations. If weights is nil, each +// weight is considered to have a value of one, otherwise the length of weights +// must match the number of observations or PrincipalComponents will panic. +// +// PrincipalComponents returns whether the analysis was successful. +func (c *PC) PrincipalComponents(a mat.Matrix, weights []float64) (ok bool) { + c.n, c.d = a.Dims() + if weights != nil && len(weights) != c.n { + panic("stat: len(weights) != observations") + } + + c.svd, c.ok = svdFactorizeCentered(c.svd, a, weights) + if c.ok { + c.weights = append(c.weights[:0], weights...) + } + return c.ok +} + +// VectorsTo returns the component direction vectors of a principal components +// analysis. The vectors are returned in the columns of a d×min(n, d) matrix. +// +// If dst is empty, VectorsTo will resize dst to be d×min(n, d). When dst is +// non-empty, VectorsTo will panic if dst is not d×min(n, d). VectorsTo will also +// panic if the receiver does not contain a successful PC. +func (c *PC) VectorsTo(dst *mat.Dense) { + if !c.ok { + panic("stat: use of unsuccessful principal components analysis") + } + + if dst.IsEmpty() { + dst.ReuseAs(c.d, min(c.n, c.d)) + } else { + if d, n := dst.Dims(); d != c.d || n != min(c.n, c.d) { + panic(mat.ErrShape) + } + } + c.svd.VTo(dst) +} + +// VarsTo returns the column variances of the principal component scores, +// b * vecs, where b is a matrix with centered columns. Variances are returned +// in descending order. +// If dst is not nil it is used to store the variances and returned. +// Vars will panic if the receiver has not successfully performed a principal +// components analysis or dst is not nil and the length of dst is not min(n, d). +func (c *PC) VarsTo(dst []float64) []float64 { + if !c.ok { + panic("stat: use of unsuccessful principal components analysis") + } + if dst != nil && len(dst) != min(c.n, c.d) { + panic("stat: length of slice does not match analysis") + } + + dst = c.svd.Values(dst) + var f float64 + if c.weights == nil { + f = 1 / float64(c.n-1) + } else { + f = 1 / (floats.Sum(c.weights) - 1) + } + for i, v := range dst { + dst[i] = f * v * v + } + return dst +} + +// CC is a type for computing the canonical correlations of a pair of matrices. +// The results of the canonical correlation analysis are only valid +// if the call to CanonicalCorrelations was successful. +type CC struct { + // n is the number of observations used to + // construct the canonical correlations. + n int + + // xd and yd are used for size checks. + xd, yd int + + x, y, c *mat.SVD + ok bool +} + +// CanonicalCorrelations performs a canonical correlation analysis of the +// input data x and y, columns of which should be interpretable as two sets +// of measurements on the same observations (rows). These observations are +// optionally weighted by weights. The result of the analysis is stored in +// the receiver if the analysis is successful. +// +// Canonical correlation analysis finds associations between two sets of +// variables on the same observations by finding linear combinations of the two +// sphered datasets that maximize the correlation between them. +// +// Some notation: let Xc and Yc denote the centered input data matrices x +// and y (column means subtracted from each column), let Sx and Sy denote the +// sample covariance matrices within x and y respectively, and let Sxy denote +// the covariance matrix between x and y. The sphered data can then be expressed +// as Xc * Sx^{-1/2} and Yc * Sy^{-1/2} respectively, and the correlation matrix +// between the sphered data is called the canonical correlation matrix, +// Sx^{-1/2} * Sxy * Sy^{-1/2}. In cases where S^{-1/2} is ambiguous for some +// covariance matrix S, S^{-1/2} is taken to be E * D^{-1/2} * Eᵀ where S can +// be eigendecomposed as S = E * D * Eᵀ. +// +// The canonical correlations are the correlations between the corresponding +// pairs of canonical variables and can be obtained with c.Corrs(). Canonical +// variables can be obtained by projecting the sphered data into the left and +// right eigenvectors of the canonical correlation matrix, and these +// eigenvectors can be obtained with c.Left(m, true) and c.Right(m, true) +// respectively. The canonical variables can also be obtained directly from the +// centered raw data by using the back-transformed eigenvectors which can be +// obtained with c.Left(m, false) and c.Right(m, false) respectively. +// +// The first pair of left and right eigenvectors of the canonical correlation +// matrix can be interpreted as directions into which the respective sphered +// data can be projected such that the correlation between the two projections +// is maximized. The second pair and onwards solve the same optimization but +// under the constraint that they are uncorrelated (orthogonal in sphered space) +// to previous projections. +// +// CanonicalCorrelations will panic if the inputs x and y do not have the same +// number of rows. +// +// The slice weights is used to weight the observations. If weights is nil, each +// weight is considered to have a value of one, otherwise the length of weights +// must match the number of observations (rows of both x and y) or +// CanonicalCorrelations will panic. +// +// More details can be found at +// https://en.wikipedia.org/wiki/Canonical_correlation +// or in Chapter 3 of +// Koch, Inge. Analysis of multivariate and high-dimensional data. +// Vol. 32. Cambridge University Press, 2013. ISBN: 9780521887939 +func (c *CC) CanonicalCorrelations(x, y mat.Matrix, weights []float64) error { + var yn int + c.n, c.xd = x.Dims() + yn, c.yd = y.Dims() + if c.n != yn { + panic("stat: unequal number of observations") + } + if weights != nil && len(weights) != c.n { + panic("stat: len(weights) != observations") + } + + // Center and factorize x and y. + c.x, c.ok = svdFactorizeCentered(c.x, x, weights) + if !c.ok { + return errors.New("stat: failed to factorize x") + } + c.y, c.ok = svdFactorizeCentered(c.y, y, weights) + if !c.ok { + return errors.New("stat: failed to factorize y") + } + var xu, xv, yu, yv mat.Dense + c.x.UTo(&xu) + c.x.VTo(&xv) + c.y.UTo(&yu) + c.y.VTo(&yv) + + // Calculate and factorise the canonical correlation matrix. + var ccor mat.Dense + ccor.Product(&xv, xu.T(), &yu, yv.T()) + if c.c == nil { + c.c = &mat.SVD{} + } + c.ok = c.c.Factorize(&ccor, mat.SVDThin) + if !c.ok { + return errors.New("stat: failed to factorize ccor") + } + return nil +} + +// CorrsTo returns the canonical correlations, using dst if it is not nil. +// If dst is not nil and len(dst) does not match the number of columns in +// the y input matrix, Corrs will panic. +func (c *CC) CorrsTo(dst []float64) []float64 { + if !c.ok { + panic("stat: canonical correlations missing or invalid") + } + + if dst != nil && len(dst) != c.yd { + panic("stat: length of destination does not match input dimension") + } + return c.c.Values(dst) +} + +// LeftTo returns the left eigenvectors of the canonical correlation matrix if +// spheredSpace is true. If spheredSpace is false it returns these eigenvectors +// back-transformed to the original data space. +// +// If dst is empty, LeftTo will resize dst to be xd×yd. When dst is +// non-empty, LeftTo will panic if dst is not xd×yd. LeftTo will also +// panic if the receiver does not contain a successful CC. +func (c *CC) LeftTo(dst *mat.Dense, spheredSpace bool) { + if !c.ok || c.n < 2 { + panic("stat: canonical correlations missing or invalid") + } + + if dst.IsEmpty() { + dst.ReuseAs(c.xd, c.yd) + } else { + if d, n := dst.Dims(); d != c.xd || n != c.yd { + panic(mat.ErrShape) + } + } + c.c.UTo(dst) + if spheredSpace { + return + } + + xs := c.x.Values(nil) + xv := &mat.Dense{} + c.x.VTo(xv) + + scaleColsReciSqrt(xv, xs) + + dst.Product(xv, xv.T(), dst) + dst.Scale(math.Sqrt(float64(c.n-1)), dst) +} + +// RightTo returns the right eigenvectors of the canonical correlation matrix if +// spheredSpace is true. If spheredSpace is false it returns these eigenvectors +// back-transformed to the original data space. +// +// If dst is empty, RightTo will resize dst to be yd×yd. When dst is +// non-empty, RightTo will panic if dst is not yd×yd. RightTo will also +// panic if the receiver does not contain a successful CC. +func (c *CC) RightTo(dst *mat.Dense, spheredSpace bool) { + if !c.ok || c.n < 2 { + panic("stat: canonical correlations missing or invalid") + } + + if dst.IsEmpty() { + dst.ReuseAs(c.yd, c.yd) + } else { + if d, n := dst.Dims(); d != c.yd || n != c.yd { + panic(mat.ErrShape) + } + } + c.c.VTo(dst) + if spheredSpace { + return + } + + ys := c.y.Values(nil) + yv := &mat.Dense{} + c.y.VTo(yv) + + scaleColsReciSqrt(yv, ys) + + dst.Product(yv, yv.T(), dst) + dst.Scale(math.Sqrt(float64(c.n-1)), dst) +} + +func svdFactorizeCentered(work *mat.SVD, m mat.Matrix, weights []float64) (svd *mat.SVD, ok bool) { + n, d := m.Dims() + centered := mat.NewDense(n, d, nil) + col := make([]float64, n) + for j := 0; j < d; j++ { + mat.Col(col, j, m) + floats.AddConst(-Mean(col, weights), col) + centered.SetCol(j, col) + } + for i, w := range weights { + floats.Scale(math.Sqrt(w), centered.RawRowView(i)) + } + if work == nil { + work = &mat.SVD{} + } + ok = work.Factorize(centered, mat.SVDThin) + return work, ok +} + +// scaleColsReciSqrt scales the columns of cols +// by the reciprocal square-root of vals. +func scaleColsReciSqrt(cols *mat.Dense, vals []float64) { + if cols == nil { + panic("stat: input nil") + } + n, d := cols.Dims() + if len(vals) != d { + panic("stat: input length mismatch") + } + col := make([]float64, n) + for j := 0; j < d; j++ { + mat.Col(col, j, cols) + floats.Scale(math.Sqrt(1/vals[j]), col) + cols.SetCol(j, col) + } +} diff --git a/vendor/gonum.org/v1/gonum/stat/roc.go b/vendor/gonum.org/v1/gonum/stat/roc.go new file mode 100644 index 0000000000..19add6fac9 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/stat/roc.go @@ -0,0 +1,198 @@ +// Copyright ©2016 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package stat + +import ( + "math" + "slices" + "sort" +) + +// ROC returns paired false positive rate (FPR) and true positive rate +// (TPR) values corresponding to cutoff points on the receiver operator +// characteristic (ROC) curve obtained when y is treated as a binary +// classifier for classes with weights. The cutoff thresholds used to +// calculate the ROC are returned in thresh such that tpr[i] and fpr[i] +// are the true and false positive rates for y >= thresh[i]. +// +// The input y and cutoffs must be sorted, and values in y must correspond +// to values in classes and weights. SortWeightedLabeled can be used to +// sort y together with classes and weights. +// +// For a given cutoff value, observations corresponding to entries in y +// greater than the cutoff value are classified as true, while those +// less than or equal to the cutoff value are classified as false. These +// assigned class labels are compared with the true values in the classes +// slice and used to calculate the FPR and TPR. +// +// If weights is nil, all weights are treated as 1. If weights is not nil +// it must have the same length as y and classes, otherwise ROC will panic. +// +// If cutoffs is nil or empty, all possible cutoffs are calculated, +// resulting in fpr and tpr having length one greater than the number of +// unique values in y. Otherwise fpr and tpr will be returned with the +// same length as cutoffs. floats.Span can be used to generate equally +// spaced cutoffs. +// +// More details about ROC curves are available at +// https://en.wikipedia.org/wiki/Receiver_operating_characteristic +func ROC(cutoffs, y []float64, classes []bool, weights []float64) (tpr, fpr, thresh []float64) { + if len(y) != len(classes) { + panic("stat: slice length mismatch") + } + if weights != nil && len(y) != len(weights) { + panic("stat: slice length mismatch") + } + if !sort.Float64sAreSorted(y) { + panic("stat: input must be sorted ascending") + } + if !sort.Float64sAreSorted(cutoffs) { + panic("stat: cutoff values must be sorted ascending") + } + if len(y) == 0 { + return nil, nil, nil + } + if len(cutoffs) == 0 { + if cutoffs == nil || cap(cutoffs) < len(y)+1 { + cutoffs = make([]float64, len(y)+1) + } else { + cutoffs = cutoffs[:len(y)+1] + } + // Choose all possible cutoffs for unique values in y. + bin := 0 + cutoffs[bin] = y[0] + for i, u := range y[1:] { + if u == y[i] { + continue + } + bin++ + cutoffs[bin] = u + } + cutoffs[bin+1] = math.Inf(1) + cutoffs = cutoffs[:bin+2] + } else { + // Don't mutate the provided cutoffs. + tmp := cutoffs + cutoffs = make([]float64, len(cutoffs)) + copy(cutoffs, tmp) + } + + tpr = make([]float64, len(cutoffs)) + fpr = make([]float64, len(cutoffs)) + var bin int + var nPos, nNeg float64 + for i, u := range classes { + // Update the bin until it matches the next y value + // skipping empty bins. + for bin < len(cutoffs)-1 && y[i] >= cutoffs[bin] { + bin++ + tpr[bin] = tpr[bin-1] + fpr[bin] = fpr[bin-1] + } + posWeight, negWeight := 1.0, 0.0 + if weights != nil { + posWeight = weights[i] + } + if !u { + posWeight, negWeight = negWeight, posWeight + } + nPos += posWeight + nNeg += negWeight + // Count false negatives (in tpr) and true negatives (in fpr). + if y[i] < cutoffs[bin] { + tpr[bin] += posWeight + fpr[bin] += negWeight + } + } + + invNeg := 1 / nNeg + invPos := 1 / nPos + // Convert negative counts to TPR and FPR. + // Bins beyond the maximum value in y are skipped + // leaving these fpr and tpr elements as zero. + for i := range tpr[:bin+1] { + // Prevent fused float operations by + // making explicit float64 conversions. + tpr[i] = 1 - float64(tpr[i]*invPos) + fpr[i] = 1 - float64(fpr[i]*invNeg) + } + slices.Reverse(tpr) + slices.Reverse(fpr) + slices.Reverse(cutoffs) + + return tpr, fpr, cutoffs +} + +// TOC returns the Total Operating Characteristic for the classes provided +// and the minimum and maximum bounds for the TOC. +// +// The input y values that correspond to classes and weights must be sorted +// in ascending order. classes[i] is the class of value y[i] and weights[i] +// is the weight of y[i]. SortWeightedLabeled can be used to sort classes +// together with weights by the rank variable, i+1. +// +// The returned ntp values can be interpreted as the number of true positives +// where values above the given rank are assigned class true for each given +// rank from 1 to len(classes). +// +// ntp_i = sum_{j ≥ len(ntp)-1 - i} [ classes_j ] * weights_j, where [x] = 1 if x else 0. +// +// The values of min and max provide the minimum and maximum possible number +// of false values for the set of classes. The first element of ntp, min and +// max are always zero as this corresponds to assigning all data class false +// and the last elements are always weighted sum of classes as this corresponds +// to assigning every data class true. For len(classes) != 0, the lengths of +// min, ntp and max are len(classes)+1. +// +// If weights is nil, all weights are treated as 1. When weights are not nil, +// the calculation of min and max allows for partial assignment of single data +// points. If weights is not nil it must have the same length as classes, +// otherwise TOC will panic. +// +// More details about TOC curves are available at +// https://en.wikipedia.org/wiki/Total_operating_characteristic +func TOC(classes []bool, weights []float64) (min, ntp, max []float64) { + if weights != nil && len(classes) != len(weights) { + panic("stat: slice length mismatch") + } + if len(classes) == 0 { + return nil, nil, nil + } + + ntp = make([]float64, len(classes)+1) + min = make([]float64, len(ntp)) + max = make([]float64, len(ntp)) + if weights == nil { + for i := range ntp[1:] { + ntp[i+1] = ntp[i] + if classes[len(classes)-i-1] { + ntp[i+1]++ + } + } + totalPositive := ntp[len(ntp)-1] + for i := range ntp { + min[i] = math.Max(0, totalPositive-float64(len(classes)-i)) + max[i] = math.Min(totalPositive, float64(i)) + } + return min, ntp, max + } + + cumw := max // Reuse max for cumulative weight. Update its elements last. + for i := range ntp[1:] { + ntp[i+1] = ntp[i] + w := weights[len(weights)-i-1] + cumw[i+1] = cumw[i] + w + if classes[len(classes)-i-1] { + ntp[i+1] += w + } + } + totw := cumw[len(cumw)-1] + totalPositive := ntp[len(ntp)-1] + for i := range ntp { + min[i] = math.Max(0, totalPositive-(totw-cumw[i])) + max[i] = math.Min(totalPositive, cumw[i]) + } + return min, ntp, max +} diff --git a/vendor/gonum.org/v1/gonum/stat/stat.go b/vendor/gonum.org/v1/gonum/stat/stat.go new file mode 100644 index 0000000000..f7d43726f7 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/stat/stat.go @@ -0,0 +1,1400 @@ +// Copyright ©2014 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package stat + +import ( + "math" + "sort" + + "gonum.org/v1/gonum/floats" +) + +// CumulantKind specifies the behavior for calculating the empirical CDF or Quantile +type CumulantKind int + +// List of supported CumulantKind values for the Quantile function. +// Constant values should match the R nomenclature. See +// https://en.wikipedia.org/wiki/Quantile#Estimating_the_quantiles_of_a_population +const ( + // Empirical treats the distribution as the actual empirical distribution. + Empirical CumulantKind = 1 + // LinInterp linearly interpolates the empirical distribution between sample values, with a flat extrapolation. + LinInterp CumulantKind = 4 +) + +// bhattacharyyaCoeff computes the Bhattacharyya Coefficient for probability distributions given by: +// +// \sum_i \sqrt{p_i q_i} +// +// It is assumed that p and q have equal length. +func bhattacharyyaCoeff(p, q []float64) float64 { + var bc float64 + for i, a := range p { + bc += math.Sqrt(a * q[i]) + } + return bc +} + +// Bhattacharyya computes the distance between the probability distributions p and q given by: +// +// -\ln ( \sum_i \sqrt{p_i q_i} ) +// +// The lengths of p and q must be equal. It is assumed that p and q sum to 1. +func Bhattacharyya(p, q []float64) float64 { + if len(p) != len(q) { + panic("stat: slice length mismatch") + } + bc := bhattacharyyaCoeff(p, q) + return -math.Log(bc) +} + +// CDF returns the empirical cumulative distribution function value of x, that is +// the fraction of the samples less than or equal to q. The +// exact behavior is determined by the CumulantKind. CDF is theoretically +// the inverse of the Quantile function, though it may not be the actual inverse +// for all values q and CumulantKinds. +// +// The x data must be sorted in increasing order. If weights is nil then all +// of the weights are 1. If weights is not nil, then len(x) must equal len(weights). +// CDF will panic if the length of x is zero. +// +// CumulantKind behaviors: +// - Empirical: Returns the lowest fraction for which q is greater than or equal +// to that fraction of samples +func CDF(q float64, c CumulantKind, x, weights []float64) float64 { + if weights != nil && len(x) != len(weights) { + panic("stat: slice length mismatch") + } + if floats.HasNaN(x) { + return math.NaN() + } + if len(x) == 0 { + panic("stat: zero length slice") + } + if !sort.Float64sAreSorted(x) { + panic("x data are not sorted") + } + + if q < x[0] { + return 0 + } + if q >= x[len(x)-1] { + return 1 + } + + var sumWeights float64 + if weights == nil { + sumWeights = float64(len(x)) + } else { + sumWeights = floats.Sum(weights) + } + + // Calculate the index + switch c { + case Empirical: + // Find the smallest value that is greater than that percent of the samples + var w float64 + for i, v := range x { + if v > q { + return w / sumWeights + } + if weights == nil { + w++ + } else { + w += weights[i] + } + } + panic("impossible") + default: + panic("stat: bad cumulant kind") + } +} + +// ChiSquare computes the chi-square distance between the observed frequencies 'obs' and +// expected frequencies 'exp' given by: +// +// \sum_i (obs_i-exp_i)^2 / exp_i +// +// The lengths of obs and exp must be equal. +func ChiSquare(obs, exp []float64) float64 { + if len(obs) != len(exp) { + panic("stat: slice length mismatch") + } + var result float64 + for i, a := range obs { + b := exp[i] + if a == 0 && b == 0 { + continue + } + result += (a - b) * (a - b) / b + } + return result +} + +// CircularMean returns the circular mean of the dataset. +// +// atan2(\sum_i w_i * sin(alpha_i), \sum_i w_i * cos(alpha_i)) +// +// If weights is nil then all of the weights are 1. If weights is not nil, then +// len(x) must equal len(weights). +func CircularMean(x, weights []float64) float64 { + if weights != nil && len(x) != len(weights) { + panic("stat: slice length mismatch") + } + + var aX, aY float64 + if weights != nil { + for i, v := range x { + aX += weights[i] * math.Cos(v) + aY += weights[i] * math.Sin(v) + } + } else { + for _, v := range x { + aX += math.Cos(v) + aY += math.Sin(v) + } + } + + return math.Atan2(aY, aX) +} + +// Correlation returns the weighted correlation between the samples of x and y +// with the given means. +// +// sum_i {w_i (x_i - meanX) * (y_i - meanY)} / (stdX * stdY) +// +// The lengths of x and y must be equal. If weights is nil then all of the +// weights are 1. If weights is not nil, then len(x) must equal len(weights). +func Correlation(x, y, weights []float64) float64 { + // This is a two-pass corrected implementation. It is an adaptation of the + // algorithm used in the MeanVariance function, which applies a correction + // to the typical two pass approach. + + if len(x) != len(y) { + panic("stat: slice length mismatch") + } + xu := Mean(x, weights) + yu := Mean(y, weights) + var ( + sxx float64 + syy float64 + sxy float64 + xcompensation float64 + ycompensation float64 + ) + if weights == nil { + for i, xv := range x { + yv := y[i] + xd := xv - xu + yd := yv - yu + sxx += xd * xd + syy += yd * yd + sxy += xd * yd + xcompensation += xd + ycompensation += yd + } + // xcompensation and ycompensation are from Chan, et. al. + // referenced in the MeanVariance function. They are analogous + // to the second term in (1.7) in that paper. + sxx -= xcompensation * xcompensation / float64(len(x)) + syy -= ycompensation * ycompensation / float64(len(x)) + + return (sxy - xcompensation*ycompensation/float64(len(x))) / math.Sqrt(sxx*syy) + + } + + var sumWeights float64 + for i, xv := range x { + w := weights[i] + yv := y[i] + xd := xv - xu + wxd := w * xd + yd := yv - yu + wyd := w * yd + sxx += wxd * xd + syy += wyd * yd + sxy += wxd * yd + xcompensation += wxd + ycompensation += wyd + sumWeights += w + } + // xcompensation and ycompensation are from Chan, et. al. + // referenced in the MeanVariance function. They are analogous + // to the second term in (1.7) in that paper, except they use + // the sumWeights instead of the sample count. + sxx -= xcompensation * xcompensation / sumWeights + syy -= ycompensation * ycompensation / sumWeights + + return (sxy - xcompensation*ycompensation/sumWeights) / math.Sqrt(sxx*syy) +} + +// Kendall returns the weighted Tau-a Kendall correlation between the +// samples of x and y. The Kendall correlation measures the quantity of +// concordant and discordant pairs of numbers. If weights are specified then +// each pair is weighted by weights[i] * weights[j] and the final sum is +// normalized to stay between -1 and 1. +// The lengths of x and y must be equal. If weights is nil then all of the +// weights are 1. If weights is not nil, then len(x) must equal len(weights). +func Kendall(x, y, weights []float64) float64 { + if len(x) != len(y) { + panic("stat: slice length mismatch") + } + + var ( + cc float64 // number of concordant pairs + dc float64 // number of discordant pairs + n = len(x) + ) + + if weights == nil { + for i := 0; i < n; i++ { + for j := i; j < n; j++ { + if i == j { + continue + } + if math.Signbit(x[j]-x[i]) == math.Signbit(y[j]-y[i]) { + cc++ + } else { + dc++ + } + } + } + return (cc - dc) / float64(n*(n-1)/2) + } + + var sumWeights float64 + + for i := 0; i < n; i++ { + for j := i; j < n; j++ { + if i == j { + continue + } + weight := weights[i] * weights[j] + if math.Signbit(x[j]-x[i]) == math.Signbit(y[j]-y[i]) { + cc += weight + } else { + dc += weight + } + sumWeights += weight + } + } + return float64(cc-dc) / sumWeights +} + +// Covariance returns the weighted covariance between the samples of x and y. +// +// sum_i {w_i (x_i - meanX) * (y_i - meanY)} / (sum_j {w_j} - 1) +// +// The lengths of x and y must be equal. If weights is nil then all of the +// weights are 1. If weights is not nil, then len(x) must equal len(weights). +func Covariance(x, y, weights []float64) float64 { + // This is a two-pass corrected implementation. It is an adaptation of the + // algorithm used in the MeanVariance function, which applies a correction + // to the typical two pass approach. + + if len(x) != len(y) { + panic("stat: slice length mismatch") + } + xu := Mean(x, weights) + yu := Mean(y, weights) + return covarianceMeans(x, y, weights, xu, yu) +} + +// covarianceMeans returns the weighted covariance between x and y with the mean +// of x and y already specified. See the documentation of Covariance for more +// information. +func covarianceMeans(x, y, weights []float64, xu, yu float64) float64 { + var ( + ss float64 + xcompensation float64 + ycompensation float64 + ) + if weights == nil { + for i, xv := range x { + yv := y[i] + xd := xv - xu + yd := yv - yu + ss += xd * yd + xcompensation += xd + ycompensation += yd + } + // xcompensation and ycompensation are from Chan, et. al. + // referenced in the MeanVariance function. They are analogous + // to the second term in (1.7) in that paper. + return (ss - xcompensation*ycompensation/float64(len(x))) / float64(len(x)-1) + } + + var sumWeights float64 + + for i, xv := range x { + w := weights[i] + yv := y[i] + wxd := w * (xv - xu) + yd := (yv - yu) + ss += wxd * yd + xcompensation += wxd + ycompensation += w * yd + sumWeights += w + } + // xcompensation and ycompensation are from Chan, et. al. + // referenced in the MeanVariance function. They are analogous + // to the second term in (1.7) in that paper, except they use + // the sumWeights instead of the sample count. + return (ss - xcompensation*ycompensation/sumWeights) / (sumWeights - 1) +} + +// CrossEntropy computes the cross-entropy between the two distributions specified +// in p and q. +func CrossEntropy(p, q []float64) float64 { + if len(p) != len(q) { + panic("stat: slice length mismatch") + } + var ce float64 + for i, v := range p { + if v != 0 { + ce -= v * math.Log(q[i]) + } + } + return ce +} + +// Entropy computes the Shannon entropy of a distribution or the distance between +// two distributions. The natural logarithm is used. +// - sum_i (p_i * log_e(p_i)) +func Entropy(p []float64) float64 { + var e float64 + for _, v := range p { + if v != 0 { // Entropy needs 0 * log(0) == 0. + e -= v * math.Log(v) + } + } + return e +} + +// ExKurtosis returns the population excess kurtosis of the sample. +// The kurtosis is defined by the 4th moment of the mean divided by the squared +// variance. The excess kurtosis subtracts 3.0 so that the excess kurtosis of +// the normal distribution is zero. +// If weights is nil then all of the weights are 1. If weights is not nil, then +// len(x) must equal len(weights). +func ExKurtosis(x, weights []float64) float64 { + mean, std := MeanStdDev(x, weights) + if weights == nil { + var e float64 + for _, v := range x { + z := (v - mean) / std + e += z * z * z * z + } + mul, offset := kurtosisCorrection(float64(len(x))) + return e*mul - offset + } + + var ( + e float64 + sumWeights float64 + ) + for i, v := range x { + z := (v - mean) / std + e += weights[i] * z * z * z * z + sumWeights += weights[i] + } + mul, offset := kurtosisCorrection(sumWeights) + return e*mul - offset +} + +// n is the number of samples +// see https://en.wikipedia.org/wiki/Kurtosis +func kurtosisCorrection(n float64) (mul, offset float64) { + return ((n + 1) / (n - 1)) * (n / (n - 2)) * (1 / (n - 3)), 3 * ((n - 1) / (n - 2)) * ((n - 1) / (n - 3)) +} + +// GeometricMean returns the weighted geometric mean of the dataset +// +// \prod_i {x_i ^ w_i} +// +// This only applies with positive x and positive weights. If weights is nil +// then all of the weights are 1. If weights is not nil, then len(x) must equal +// len(weights). +func GeometricMean(x, weights []float64) float64 { + if weights == nil { + var s float64 + for _, v := range x { + s += math.Log(v) + } + s /= float64(len(x)) + return math.Exp(s) + } + if len(x) != len(weights) { + panic("stat: slice length mismatch") + } + var ( + s float64 + sumWeights float64 + ) + for i, v := range x { + s += weights[i] * math.Log(v) + sumWeights += weights[i] + } + s /= sumWeights + return math.Exp(s) +} + +// HarmonicMean returns the weighted harmonic mean of the dataset +// +// \sum_i {w_i} / ( sum_i {w_i / x_i} ) +// +// This only applies with positive x and positive weights. +// If weights is nil then all of the weights are 1. If weights is not nil, then +// len(x) must equal len(weights). +func HarmonicMean(x, weights []float64) float64 { + if weights != nil && len(x) != len(weights) { + panic("stat: slice length mismatch") + } + // TODO(btracey): Fix this to make it more efficient and avoid allocation. + + // This can be numerically unstable (for example if x is very small). + // W = \sum_i {w_i} + // hm = exp(log(W) - log(\sum_i w_i / x_i)) + + logs := make([]float64, len(x)) + var W float64 + for i := range x { + if weights == nil { + logs[i] = -math.Log(x[i]) + W++ + continue + } + logs[i] = math.Log(weights[i]) - math.Log(x[i]) + W += weights[i] + } + + // Sum all of the logs + v := floats.LogSumExp(logs) // This computes log(\sum_i { w_i / x_i}). + return math.Exp(math.Log(W) - v) +} + +// Hellinger computes the distance between the probability distributions p and q given by: +// +// \sqrt{ 1 - \sum_i \sqrt{p_i q_i} } +// +// The lengths of p and q must be equal. It is assumed that p and q sum to 1. +func Hellinger(p, q []float64) float64 { + if len(p) != len(q) { + panic("stat: slice length mismatch") + } + bc := bhattacharyyaCoeff(p, q) + return math.Sqrt(1 - bc) +} + +// Histogram sums up the weighted number of data points in each bin. +// The weight of data point x[i] will be placed into count[j] if +// dividers[j] <= x < dividers[j+1]. The "span" function in the floats package can assist +// with bin creation. +// +// The following conditions on the inputs apply: +// - The count variable must either be nil or have length of one less than dividers. +// - The values in dividers must be sorted (use the sort package). +// - The x values must be sorted. +// - If weights is nil then all of the weights are 1. +// - If weights is not nil, then len(x) must equal len(weights). +func Histogram(count, dividers, x, weights []float64) []float64 { + if weights != nil && len(x) != len(weights) { + panic("stat: slice length mismatch") + } + if count == nil { + count = make([]float64, len(dividers)-1) + } + if len(dividers) < 2 { + panic("histogram: fewer than two dividers") + } + if len(count) != len(dividers)-1 { + panic("histogram: bin count mismatch") + } + if !sort.Float64sAreSorted(dividers) { + panic("histogram: dividers are not sorted") + } + if !sort.Float64sAreSorted(x) { + panic("histogram: x data are not sorted") + } + for i := range count { + count[i] = 0 + } + if len(x) == 0 { + return count + } + if x[0] < dividers[0] { + panic("histogram: minimum x value is less than lowest divider") + } + if dividers[len(dividers)-1] <= x[len(x)-1] { + panic("histogram: maximum x value is greater than or equal to highest divider") + } + + idx := 0 + comp := dividers[idx+1] + if weights == nil { + for _, v := range x { + if v < comp { + // Still in the current bucket. + count[idx]++ + continue + } + // Find the next divider where v is less than the divider. + for j := idx + 1; j < len(dividers); j++ { + if v < dividers[j+1] { + idx = j + comp = dividers[j+1] + break + } + } + count[idx]++ + } + return count + } + + for i, v := range x { + if v < comp { + // Still in the current bucket. + count[idx] += weights[i] + continue + } + // Need to find the next divider where v is less than the divider. + for j := idx + 1; j < len(count); j++ { + if v < dividers[j+1] { + idx = j + comp = dividers[j+1] + break + } + } + count[idx] += weights[i] + } + return count +} + +// JensenShannon computes the JensenShannon divergence between the distributions +// p and q. The Jensen-Shannon divergence is defined as +// +// m = 0.5 * (p + q) +// JS(p, q) = 0.5 ( KL(p, m) + KL(q, m) ) +// +// Unlike Kullback-Leibler, the Jensen-Shannon distance is symmetric. The value +// is between 0 and ln(2). +func JensenShannon(p, q []float64) float64 { + if len(p) != len(q) { + panic("stat: slice length mismatch") + } + var js float64 + for i, v := range p { + qi := q[i] + m := 0.5 * (v + qi) + if v != 0 { + // add kl from p to m + js += 0.5 * v * (math.Log(v) - math.Log(m)) + } + if qi != 0 { + // add kl from q to m + js += 0.5 * qi * (math.Log(qi) - math.Log(m)) + } + } + return js +} + +// KolmogorovSmirnov computes the largest distance between two empirical CDFs. +// Each dataset x and y consists of sample locations and counts, xWeights and +// yWeights, respectively. +// +// x and y may have different lengths, though len(x) must equal len(xWeights), and +// len(y) must equal len(yWeights). Both x and y must be sorted. +// +// Special cases are: +// +// = 0 if len(x) == len(y) == 0 +// = 1 if len(x) == 0, len(y) != 0 or len(x) != 0 and len(y) == 0 +func KolmogorovSmirnov(x, xWeights, y, yWeights []float64) float64 { + if xWeights != nil && len(x) != len(xWeights) { + panic("stat: slice length mismatch") + } + if yWeights != nil && len(y) != len(yWeights) { + panic("stat: slice length mismatch") + } + if len(x) == 0 || len(y) == 0 { + if len(x) == 0 && len(y) == 0 { + return 0 + } + return 1 + } + + if floats.HasNaN(x) { + return math.NaN() + } + if floats.HasNaN(y) { + return math.NaN() + } + + if !sort.Float64sAreSorted(x) { + panic("x data are not sorted") + } + if !sort.Float64sAreSorted(y) { + panic("y data are not sorted") + } + + xWeightsNil := xWeights == nil + yWeightsNil := yWeights == nil + + var ( + maxDist float64 + xSum, ySum float64 + xCdf, yCdf float64 + xIdx, yIdx int + ) + + if xWeightsNil { + xSum = float64(len(x)) + } else { + xSum = floats.Sum(xWeights) + } + + if yWeightsNil { + ySum = float64(len(y)) + } else { + ySum = floats.Sum(yWeights) + } + + xVal := x[0] + yVal := y[0] + + // Algorithm description: + // The goal is to find the maximum difference in the empirical CDFs for the + // two datasets. The CDFs are piecewise-constant, and thus the distance + // between the CDFs will only change at the values themselves. + // + // To find the maximum distance, step through the data in ascending order + // of value between the two datasets. At each step, compute the empirical CDF + // and compare the local distance with the maximum distance. + // Due to some corner cases, equal data entries must be tallied simultaneously. + for { + switch { + case xVal < yVal: + xVal, xCdf, xIdx = updateKS(xIdx, xCdf, xSum, x, xWeights, xWeightsNil) + case yVal < xVal: + yVal, yCdf, yIdx = updateKS(yIdx, yCdf, ySum, y, yWeights, yWeightsNil) + case xVal == yVal: + newX := x[xIdx] + newY := y[yIdx] + if newX < newY { + xVal, xCdf, xIdx = updateKS(xIdx, xCdf, xSum, x, xWeights, xWeightsNil) + } else if newY < newX { + yVal, yCdf, yIdx = updateKS(yIdx, yCdf, ySum, y, yWeights, yWeightsNil) + } else { + // Update them both, they'll be equal next time and the right + // thing will happen. + xVal, xCdf, xIdx = updateKS(xIdx, xCdf, xSum, x, xWeights, xWeightsNil) + yVal, yCdf, yIdx = updateKS(yIdx, yCdf, ySum, y, yWeights, yWeightsNil) + } + default: + panic("unreachable") + } + + dist := math.Abs(xCdf - yCdf) + if dist > maxDist { + maxDist = dist + } + + // Both xCdf and yCdf will equal 1 at the end, so if we have reached the + // end of either sample list, the distance is as large as it can be. + if xIdx == len(x) || yIdx == len(y) { + return maxDist + } + } +} + +// updateKS gets the next data point from one of the set. In doing so, it combines +// the weight of all the data points of equal value. Upon return, val is the new +// value of the data set, newCdf is the total combined CDF up until this point, +// and newIdx is the index of the next location in that sample to examine. +func updateKS(idx int, cdf, sum float64, values, weights []float64, isNil bool) (val, newCdf float64, newIdx int) { + // Sum up all the weights of consecutive values that are equal. + if isNil { + newCdf = cdf + 1/sum + } else { + newCdf = cdf + weights[idx]/sum + } + newIdx = idx + 1 + for { + if newIdx == len(values) { + return values[newIdx-1], newCdf, newIdx + } + if values[newIdx-1] != values[newIdx] { + return values[newIdx], newCdf, newIdx + } + if isNil { + newCdf += 1 / sum + } else { + newCdf += weights[newIdx] / sum + } + newIdx++ + } +} + +// KullbackLeibler computes the Kullback-Leibler distance between the +// distributions p and q. The natural logarithm is used. +// +// sum_i(p_i * log(p_i / q_i)) +// +// Note that the Kullback-Leibler distance is not symmetric; +// KullbackLeibler(p,q) != KullbackLeibler(q,p) +func KullbackLeibler(p, q []float64) float64 { + if len(p) != len(q) { + panic("stat: slice length mismatch") + } + var kl float64 + for i, v := range p { + if v != 0 { // Entropy needs 0 * log(0) == 0. + kl += v * (math.Log(v) - math.Log(q[i])) + } + } + return kl +} + +// LinearRegression computes the best-fit line +// +// y = alpha + beta*x +// +// to the data in x and y with the given weights. If origin is true, the +// regression is forced to pass through the origin. +// +// Specifically, LinearRegression computes the values of alpha and +// beta such that the total residual +// +// \sum_i w[i]*(y[i] - alpha - beta*x[i])^2 +// +// is minimized. If origin is true, then alpha is forced to be zero. +// +// The lengths of x and y must be equal. If weights is nil then all of the +// weights are 1. If weights is not nil, then len(x) must equal len(weights). +func LinearRegression(x, y, weights []float64, origin bool) (alpha, beta float64) { + if len(x) != len(y) { + panic("stat: slice length mismatch") + } + if weights != nil && len(weights) != len(x) { + panic("stat: slice length mismatch") + } + + w := 1.0 + if origin { + var x2Sum, xySum float64 + for i, xi := range x { + if weights != nil { + w = weights[i] + } + yi := y[i] + xySum += w * xi * yi + x2Sum += w * xi * xi + } + beta = xySum / x2Sum + + return 0, beta + } + + xu, xv := MeanVariance(x, weights) + yu := Mean(y, weights) + cov := covarianceMeans(x, y, weights, xu, yu) + beta = cov / xv + alpha = yu - beta*xu + return alpha, beta +} + +// RSquared returns the coefficient of determination defined as +// +// R^2 = 1 - \sum_i w[i]*(y[i] - alpha - beta*x[i])^2 / \sum_i w[i]*(y[i] - mean(y))^2 +// +// for the line +// +// y = alpha + beta*x +// +// and the data in x and y with the given weights. +// +// The lengths of x and y must be equal. If weights is nil then all of the +// weights are 1. If weights is not nil, then len(x) must equal len(weights). +func RSquared(x, y, weights []float64, alpha, beta float64) float64 { + if len(x) != len(y) { + panic("stat: slice length mismatch") + } + if weights != nil && len(weights) != len(x) { + panic("stat: slice length mismatch") + } + + w := 1.0 + yMean := Mean(y, weights) + var res, tot, d float64 + for i, xi := range x { + if weights != nil { + w = weights[i] + } + yi := y[i] + fi := alpha + beta*xi + d = yi - fi + res += w * d * d + d = yi - yMean + tot += w * d * d + } + return 1 - res/tot +} + +// RSquaredFrom returns the coefficient of determination defined as +// +// R^2 = 1 - \sum_i w[i]*(estimate[i] - value[i])^2 / \sum_i w[i]*(value[i] - mean(values))^2 +// +// and the data in estimates and values with the given weights. +// +// The lengths of estimates and values must be equal. If weights is nil then +// all of the weights are 1. If weights is not nil, then len(values) must +// equal len(weights). +func RSquaredFrom(estimates, values, weights []float64) float64 { + if len(estimates) != len(values) { + panic("stat: slice length mismatch") + } + if weights != nil && len(weights) != len(values) { + panic("stat: slice length mismatch") + } + + w := 1.0 + mean := Mean(values, weights) + var res, tot, d float64 + for i, val := range values { + if weights != nil { + w = weights[i] + } + d = val - estimates[i] + res += w * d * d + d = val - mean + tot += w * d * d + } + return 1 - res/tot +} + +// RNoughtSquared returns the coefficient of determination defined as +// +// R₀^2 = \sum_i w[i]*(beta*x[i])^2 / \sum_i w[i]*y[i]^2 +// +// for the line +// +// y = beta*x +// +// and the data in x and y with the given weights. RNoughtSquared should +// only be used for best-fit lines regressed through the origin. +// +// The lengths of x and y must be equal. If weights is nil then all of the +// weights are 1. If weights is not nil, then len(x) must equal len(weights). +func RNoughtSquared(x, y, weights []float64, beta float64) float64 { + if len(x) != len(y) { + panic("stat: slice length mismatch") + } + if weights != nil && len(weights) != len(x) { + panic("stat: slice length mismatch") + } + + w := 1.0 + var ssr, tot float64 + for i, xi := range x { + if weights != nil { + w = weights[i] + } + fi := beta * xi + ssr += w * fi * fi + yi := y[i] + tot += w * yi * yi + } + return ssr / tot +} + +// Mean computes the weighted mean of the data set. +// +// sum_i {w_i * x_i} / sum_i {w_i} +// +// If weights is nil then all of the weights are 1. If weights is not nil, then +// len(x) must equal len(weights). +func Mean(x, weights []float64) float64 { + if weights == nil { + return floats.Sum(x) / float64(len(x)) + } + if len(x) != len(weights) { + panic("stat: slice length mismatch") + } + var ( + sumValues float64 + sumWeights float64 + ) + for i, w := range weights { + sumValues += w * x[i] + sumWeights += w + } + return sumValues / sumWeights +} + +// Mode returns the most common value in the dataset specified by x and the +// given weights. Strict float64 equality is used when comparing values, so users +// should take caution. If several values are the mode, any of them may be returned. +func Mode(x, weights []float64) (val float64, count float64) { + if weights != nil && len(x) != len(weights) { + panic("stat: slice length mismatch") + } + if len(x) == 0 { + return 0, 0 + } + m := make(map[float64]float64) + if weights == nil { + for _, v := range x { + m[v]++ + } + } else { + for i, v := range x { + m[v] += weights[i] + } + } + var ( + maxCount float64 + max float64 + ) + for val, count := range m { + if count > maxCount { + maxCount = count + max = val + } + } + return max, maxCount +} + +// BivariateMoment computes the weighted mixed moment between the samples x and y. +// +// E[(x - μ_x)^r*(y - μ_y)^s] +// +// No degrees of freedom correction is done. +// The lengths of x and y must be equal. If weights is nil then all of the +// weights are 1. If weights is not nil, then len(x) must equal len(weights). +func BivariateMoment(r, s float64, x, y, weights []float64) float64 { + meanX := Mean(x, weights) + meanY := Mean(y, weights) + if len(x) != len(y) { + panic("stat: slice length mismatch") + } + if weights == nil { + var m float64 + for i, vx := range x { + vy := y[i] + m += math.Pow(vx-meanX, r) * math.Pow(vy-meanY, s) + } + return m / float64(len(x)) + } + if len(weights) != len(x) { + panic("stat: slice length mismatch") + } + var ( + m float64 + sumWeights float64 + ) + for i, vx := range x { + vy := y[i] + w := weights[i] + m += w * math.Pow(vx-meanX, r) * math.Pow(vy-meanY, s) + sumWeights += w + } + return m / sumWeights +} + +// Moment computes the weighted n^th moment of the samples, +// +// E[(x - μ)^N] +// +// No degrees of freedom correction is done. +// If weights is nil then all of the weights are 1. If weights is not nil, then +// len(x) must equal len(weights). +func Moment(moment float64, x, weights []float64) float64 { + // This also checks that x and weights have the same length. + mean := Mean(x, weights) + if weights == nil { + var m float64 + for _, v := range x { + m += math.Pow(v-mean, moment) + } + return m / float64(len(x)) + } + var ( + m float64 + sumWeights float64 + ) + for i, v := range x { + w := weights[i] + m += w * math.Pow(v-mean, moment) + sumWeights += w + } + return m / sumWeights +} + +// MomentAbout computes the weighted n^th weighted moment of the samples about +// the given mean \mu, +// +// E[(x - μ)^N] +// +// No degrees of freedom correction is done. +// If weights is nil then all of the weights are 1. If weights is not nil, then +// len(x) must equal len(weights). +func MomentAbout(moment float64, x []float64, mean float64, weights []float64) float64 { + if weights == nil { + var m float64 + for _, v := range x { + m += math.Pow(v-mean, moment) + } + m /= float64(len(x)) + return m + } + if len(weights) != len(x) { + panic("stat: slice length mismatch") + } + var ( + m float64 + sumWeights float64 + ) + for i, v := range x { + m += weights[i] * math.Pow(v-mean, moment) + sumWeights += weights[i] + } + return m / sumWeights +} + +// Quantile returns the sample of x such that x is greater than or +// equal to the fraction p of samples. The exact behavior is determined by the +// CumulantKind, and p should be a number between 0 and 1. Quantile is theoretically +// the inverse of the CDF function, though it may not be the actual inverse +// for all values p and CumulantKinds. +// +// The x data must be sorted in increasing order. If weights is nil then all +// of the weights are 1. If weights is not nil, then len(x) must equal len(weights). +// Quantile will panic if the length of x is zero. +// +// CumulantKind behaviors: +// - Empirical: Returns the lowest value q for which q is greater than or equal +// to the fraction p of samples +// - LinInterp: Returns the linearly interpolated value +func Quantile(p float64, c CumulantKind, x, weights []float64) float64 { + if !(p >= 0 && p <= 1) { + panic("stat: percentile out of bounds") + } + + if weights != nil && len(x) != len(weights) { + panic("stat: slice length mismatch") + } + if len(x) == 0 { + panic("stat: zero length slice") + } + if floats.HasNaN(x) { + return math.NaN() // This is needed because the algorithm breaks otherwise. + } + if !sort.Float64sAreSorted(x) { + panic("x data are not sorted") + } + + var sumWeights float64 + if weights == nil { + sumWeights = float64(len(x)) + } else { + sumWeights = floats.Sum(weights) + } + switch c { + case Empirical: + return empiricalQuantile(p, x, weights, sumWeights) + case LinInterp: + return linInterpQuantile(p, x, weights, sumWeights) + default: + panic("stat: bad cumulant kind") + } +} + +func empiricalQuantile(p float64, x, weights []float64, sumWeights float64) float64 { + var cumsum float64 + fidx := p * sumWeights + for i := range x { + if weights == nil { + cumsum++ + } else { + cumsum += weights[i] + } + if cumsum >= fidx { + return x[i] + } + } + panic("impossible") +} + +func linInterpQuantile(p float64, x, weights []float64, sumWeights float64) float64 { + var cumsum float64 + fidx := p * sumWeights + for i := range x { + if weights == nil { + cumsum++ + } else { + cumsum += weights[i] + } + if cumsum >= fidx { + if i == 0 { + return x[0] + } + t := cumsum - fidx + if weights != nil { + t /= weights[i] + } + return t*x[i-1] + (1-t)*x[i] + } + } + panic("impossible") +} + +// Skew computes the skewness of the sample data. +// If weights is nil then all of the weights are 1. If weights is not nil, then +// len(x) must equal len(weights). +// When weights sum to 1 or less, a biased variance estimator should be used. +func Skew(x, weights []float64) float64 { + + mean, std := MeanStdDev(x, weights) + if weights == nil { + var s float64 + for _, v := range x { + z := (v - mean) / std + s += z * z * z + } + return s * skewCorrection(float64(len(x))) + } + var ( + s float64 + sumWeights float64 + ) + for i, v := range x { + z := (v - mean) / std + s += weights[i] * z * z * z + sumWeights += weights[i] + } + return s * skewCorrection(sumWeights) +} + +// From: http://www.amstat.org/publications/jse/v19n2/doane.pdf page 7 +func skewCorrection(n float64) float64 { + return (n / (n - 1)) * (1 / (n - 2)) +} + +// SortWeighted rearranges the data in x along with their corresponding +// weights so that the x data are sorted. The data is sorted in place. +// Weights may be nil, but if weights is non-nil then it must have the same +// length as x. +func SortWeighted(x, weights []float64) { + if weights == nil { + sort.Float64s(x) + return + } + if len(x) != len(weights) { + panic("stat: slice length mismatch") + } + sort.Sort(weightSorter{ + x: x, + w: weights, + }) +} + +type weightSorter struct { + x []float64 + w []float64 +} + +func (w weightSorter) Len() int { return len(w.x) } +func (w weightSorter) Less(i, j int) bool { return w.x[i] < w.x[j] } +func (w weightSorter) Swap(i, j int) { + w.x[i], w.x[j] = w.x[j], w.x[i] + w.w[i], w.w[j] = w.w[j], w.w[i] +} + +// SortWeightedLabeled rearranges the data in x along with their +// corresponding weights and boolean labels so that the x data are sorted. +// The data is sorted in place. Weights and labels may be nil, if either +// is non-nil it must have the same length as x. +func SortWeightedLabeled(x []float64, labels []bool, weights []float64) { + if labels == nil { + SortWeighted(x, weights) + return + } + if weights == nil { + if len(x) != len(labels) { + panic("stat: slice length mismatch") + } + sort.Sort(labelSorter{ + x: x, + l: labels, + }) + return + } + if len(x) != len(labels) || len(x) != len(weights) { + panic("stat: slice length mismatch") + } + sort.Sort(weightLabelSorter{ + x: x, + l: labels, + w: weights, + }) +} + +type labelSorter struct { + x []float64 + l []bool +} + +func (a labelSorter) Len() int { return len(a.x) } +func (a labelSorter) Less(i, j int) bool { return a.x[i] < a.x[j] } +func (a labelSorter) Swap(i, j int) { + a.x[i], a.x[j] = a.x[j], a.x[i] + a.l[i], a.l[j] = a.l[j], a.l[i] +} + +type weightLabelSorter struct { + x []float64 + l []bool + w []float64 +} + +func (a weightLabelSorter) Len() int { return len(a.x) } +func (a weightLabelSorter) Less(i, j int) bool { return a.x[i] < a.x[j] } +func (a weightLabelSorter) Swap(i, j int) { + a.x[i], a.x[j] = a.x[j], a.x[i] + a.l[i], a.l[j] = a.l[j], a.l[i] + a.w[i], a.w[j] = a.w[j], a.w[i] +} + +// StdDev returns the sample standard deviation. +func StdDev(x, weights []float64) float64 { + _, std := MeanStdDev(x, weights) + return std +} + +// MeanStdDev returns the sample mean and unbiased standard deviation +// When weights sum to 1 or less, a biased variance estimator should be used. +func MeanStdDev(x, weights []float64) (mean, std float64) { + mean, variance := MeanVariance(x, weights) + return mean, math.Sqrt(variance) +} + +// StdErr returns the standard error in the mean with the given values. +func StdErr(std, sampleSize float64) float64 { + return std / math.Sqrt(sampleSize) +} + +// StdScore returns the standard score (a.k.a. z-score, z-value) for the value x +// with the given mean and standard deviation, i.e. +// +// (x - mean) / std +func StdScore(x, mean, std float64) float64 { + return (x - mean) / std +} + +// Variance computes the unbiased weighted sample variance: +// +// \sum_i w_i (x_i - mean)^2 / (sum_i w_i - 1) +// +// If weights is nil then all of the weights are 1. If weights is not nil, then +// len(x) must equal len(weights). +// When weights sum to 1 or less, a biased variance estimator should be used. +func Variance(x, weights []float64) float64 { + _, variance := MeanVariance(x, weights) + return variance +} + +// MeanVariance computes the sample mean and unbiased variance, where the mean and variance are +// +// \sum_i w_i * x_i / (sum_i w_i) +// \sum_i w_i (x_i - mean)^2 / (sum_i w_i - 1) +// +// respectively. +// If weights is nil then all of the weights are 1. If weights is not nil, then +// len(x) must equal len(weights). +// When weights sum to 1 or less, a biased variance estimator should be used. +func MeanVariance(x, weights []float64) (mean, variance float64) { + var ( + unnormalisedVariance float64 + sumWeights float64 + ) + mean, unnormalisedVariance, sumWeights = meanUnnormalisedVarianceSumWeights(x, weights) + return mean, unnormalisedVariance / (sumWeights - 1) +} + +// PopMeanVariance computes the sample mean and biased variance (also known as +// "population variance"), where the mean and variance are +// +// \sum_i w_i * x_i / (sum_i w_i) +// \sum_i w_i (x_i - mean)^2 / (sum_i w_i) +// +// respectively. +// If weights is nil then all of the weights are 1. If weights is not nil, then +// len(x) must equal len(weights). +func PopMeanVariance(x, weights []float64) (mean, variance float64) { + var ( + unnormalisedVariance float64 + sumWeights float64 + ) + mean, unnormalisedVariance, sumWeights = meanUnnormalisedVarianceSumWeights(x, weights) + return mean, unnormalisedVariance / sumWeights +} + +// PopMeanStdDev returns the sample mean and biased standard deviation +// (also known as "population standard deviation"). +func PopMeanStdDev(x, weights []float64) (mean, std float64) { + mean, variance := PopMeanVariance(x, weights) + return mean, math.Sqrt(variance) +} + +// PopStdDev returns the population standard deviation, i.e., a square root +// of the biased variance estimate. +func PopStdDev(x, weights []float64) float64 { + _, stDev := PopMeanStdDev(x, weights) + return stDev +} + +// PopVariance computes the unbiased weighted sample variance: +// +// \sum_i w_i (x_i - mean)^2 / (sum_i w_i) +// +// If weights is nil then all of the weights are 1. If weights is not nil, then +// len(x) must equal len(weights). +func PopVariance(x, weights []float64) float64 { + _, variance := PopMeanVariance(x, weights) + return variance +} + +func meanUnnormalisedVarianceSumWeights(x, weights []float64) (mean, unnormalisedVariance, sumWeights float64) { + // This uses the corrected two-pass algorithm (1.7), from "Algorithms for computing + // the sample variance: Analysis and recommendations" by Chan, Tony F., Gene H. Golub, + // and Randall J. LeVeque. + + // Note that this will panic if the slice lengths do not match. + mean = Mean(x, weights) + var ( + ss float64 + compensation float64 + ) + if weights == nil { + for _, v := range x { + d := v - mean + ss += d * d + compensation += d + } + unnormalisedVariance = (ss - compensation*compensation/float64(len(x))) + return mean, unnormalisedVariance, float64(len(x)) + } + + for i, v := range x { + w := weights[i] + d := v - mean + wd := w * d + ss += wd * d + compensation += wd + sumWeights += w + } + unnormalisedVariance = (ss - compensation*compensation/sumWeights) + return mean, unnormalisedVariance, sumWeights +} diff --git a/vendor/gonum.org/v1/gonum/stat/statmat.go b/vendor/gonum.org/v1/gonum/stat/statmat.go new file mode 100644 index 0000000000..4f05f30645 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/stat/statmat.go @@ -0,0 +1,142 @@ +// Copyright ©2014 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package stat + +import ( + "math" + + "gonum.org/v1/gonum/floats" + "gonum.org/v1/gonum/mat" +) + +// CovarianceMatrix calculates the covariance matrix (also known as the +// variance-covariance matrix) calculated from a matrix of data, x, using +// a two-pass algorithm. The result is stored in dst. +// +// If weights is not nil the weighted covariance of x is calculated. weights +// must have length equal to the number of rows in input data matrix and +// must not contain negative elements. +// The dst matrix must either be empty or have the same number of +// columns as the input data matrix. +func CovarianceMatrix(dst *mat.SymDense, x mat.Matrix, weights []float64) { + // This is the matrix version of the two-pass algorithm. It doesn't use the + // additional floating point error correction that the Covariance function uses + // to reduce the impact of rounding during centering. + + r, c := x.Dims() + + if dst.IsEmpty() { + *dst = *(dst.GrowSym(c).(*mat.SymDense)) + } else if n := dst.SymmetricDim(); n != c { + panic(mat.ErrShape) + } + + var xt mat.Dense + xt.CloneFrom(x.T()) + // Subtract the mean of each of the columns. + for i := 0; i < c; i++ { + v := xt.RawRowView(i) + // This will panic with ErrShape if len(weights) != len(v), so + // we don't have to check the size later. + mean := Mean(v, weights) + floats.AddConst(-mean, v) + } + + if weights == nil { + // Calculate the normalization factor + // scaled by the sample size. + dst.SymOuterK(1/(float64(r)-1), &xt) + return + } + + // Multiply by the sqrt of the weights, so that multiplication is symmetric. + sqrtwts := make([]float64, r) + for i, w := range weights { + if w < 0 { + panic("stat: negative covariance matrix weights") + } + sqrtwts[i] = math.Sqrt(w) + } + // Weight the rows. + for i := 0; i < c; i++ { + v := xt.RawRowView(i) + floats.Mul(v, sqrtwts) + } + + // Calculate the normalization factor + // scaled by the weighted sample size. + dst.SymOuterK(1/(floats.Sum(weights)-1), &xt) +} + +// CorrelationMatrix returns the correlation matrix calculated from a matrix +// of data, x, using a two-pass algorithm. The result is stored in dst. +// +// If weights is not nil the weighted correlation of x is calculated. weights +// must have length equal to the number of rows in input data matrix and +// must not contain negative elements. +// The dst matrix must either be empty or have the same number of +// columns as the input data matrix. +func CorrelationMatrix(dst *mat.SymDense, x mat.Matrix, weights []float64) { + // This will panic if the sizes don't match, or if weights is the wrong size. + CovarianceMatrix(dst, x, weights) + covToCorr(dst) +} + +// covToCorr converts a covariance matrix to a correlation matrix. +func covToCorr(c *mat.SymDense) { + r := c.SymmetricDim() + + s := make([]float64, r) + for i := 0; i < r; i++ { + s[i] = 1 / math.Sqrt(c.At(i, i)) + } + for i, sx := range s { + // Ensure that the diagonal has exactly ones. + c.SetSym(i, i, 1) + for j := i + 1; j < r; j++ { + v := c.At(i, j) + c.SetSym(i, j, v*sx*s[j]) + } + } +} + +// corrToCov converts a correlation matrix to a covariance matrix. +// The input sigma should be vector of standard deviations corresponding +// to the covariance. It will panic if len(sigma) is not equal to the +// number of rows in the correlation matrix. +func corrToCov(c *mat.SymDense, sigma []float64) { + r, _ := c.Dims() + + if r != len(sigma) { + panic(mat.ErrShape) + } + for i, sx := range sigma { + // Ensure that the diagonal has exactly sigma squared. + c.SetSym(i, i, sx*sx) + for j := i + 1; j < r; j++ { + v := c.At(i, j) + c.SetSym(i, j, v*sx*sigma[j]) + } + } +} + +// Mahalanobis computes the Mahalanobis distance +// +// D = sqrt((x-y)ᵀ * Σ^-1 * (x-y)) +// +// between the column vectors x and y given the cholesky decomposition of Σ. +// Mahalanobis returns NaN if the linear solve fails. +// +// See https://en.wikipedia.org/wiki/Mahalanobis_distance for more information. +func Mahalanobis(x, y mat.Vector, chol *mat.Cholesky) float64 { + var diff mat.VecDense + diff.SubVec(x, y) + var tmp mat.VecDense + err := chol.SolveVecTo(&tmp, &diff) + if err != nil { + return math.NaN() + } + return math.Sqrt(mat.Dot(&tmp, &diff)) +} diff --git a/vendor/modules.txt b/vendor/modules.txt index 017c1a3b7d..ae92659462 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -616,6 +616,7 @@ golang.org/x/text/unicode/norm golang.org/x/time/rate # golang.org/x/tools v0.30.0 ## explicit; go 1.22.0 +golang.org/x/tools/container/intsets golang.org/x/tools/cover golang.org/x/tools/go/ast/astutil golang.org/x/tools/go/ast/inspector @@ -644,6 +645,34 @@ golang.org/x/tools/internal/versions # gomodules.xyz/jsonpatch/v2 v2.4.0 ## explicit; go 1.20 gomodules.xyz/jsonpatch/v2 +# gonum.org/v1/gonum v0.16.0 +## explicit; go 1.23.0 +gonum.org/v1/gonum/blas +gonum.org/v1/gonum/blas/blas64 +gonum.org/v1/gonum/blas/cblas128 +gonum.org/v1/gonum/blas/gonum +gonum.org/v1/gonum/floats +gonum.org/v1/gonum/floats/scalar +gonum.org/v1/gonum/internal/asm/c128 +gonum.org/v1/gonum/internal/asm/c64 +gonum.org/v1/gonum/internal/asm/f32 +gonum.org/v1/gonum/internal/asm/f64 +gonum.org/v1/gonum/internal/cmplx64 +gonum.org/v1/gonum/internal/math32 +gonum.org/v1/gonum/lapack +gonum.org/v1/gonum/lapack/gonum +gonum.org/v1/gonum/lapack/lapack64 +gonum.org/v1/gonum/mat +gonum.org/v1/gonum/mathext +gonum.org/v1/gonum/mathext/internal/amos +gonum.org/v1/gonum/mathext/internal/cephes +gonum.org/v1/gonum/mathext/internal/gonum +gonum.org/v1/gonum/optimize +gonum.org/v1/gonum/spatial/r1 +gonum.org/v1/gonum/stat +gonum.org/v1/gonum/stat/combin +gonum.org/v1/gonum/stat/distmv +gonum.org/v1/gonum/stat/distuv # google.golang.org/genproto/googleapis/rpc v0.0.0-20250227231956-55c901821b1e ## explicit; go 1.23.0 google.golang.org/genproto/googleapis/rpc/status From 04da524254e357c80e2feffdfc6354712979639f Mon Sep 17 00:00:00 2001 From: Francesco Romani Date: Wed, 16 Jul 2025 19:25:08 +0200 Subject: [PATCH 2/6] WIP: SMT postprocessing TODO explain why Signed-off-by: Francesco Romani --- .../profilecreator/autosize/autosize.go | 26 ++++++++++++++----- 1 file changed, 19 insertions(+), 7 deletions(-) diff --git a/pkg/performanceprofile/profilecreator/autosize/autosize.go b/pkg/performanceprofile/profilecreator/autosize/autosize.go index 5be9800ec6..5857c7ad39 100644 --- a/pkg/performanceprofile/profilecreator/autosize/autosize.go +++ b/pkg/performanceprofile/profilecreator/autosize/autosize.go @@ -215,28 +215,40 @@ func Compute(env Env, params Params) (Values, Score, error) { return params.DefaultAllocation(), Score{}, err } - smtLevel := params.SMTLevel() totCPUs := params.TotalCPUs() score := Score{Cost: result.F} - x_cr := int(math.Round(result.Location.X[0])) - x_c := asMultipleOf(x_cr, smtLevel) - env.Log.Printf("Optimization value: Xc=%v -> Xc=%v (SMTLevel=%v)", x_cr, x_c, smtLevel) + x_c := int(math.Round(result.Location.X[0])) - vals := Values{ + opt := Values{ ReservedCPUCount: x_c, IsolatedCPUCount: totCPUs - x_c, // we can use x_w, but we just leverage invariants } - env.Log.Printf("Optimization result: %s", vals.String()) + env.Log.Printf("Optimization result: %s", opt.String()) - if err := Validate(params, vals); err != nil { + if err := Validate(params, opt); err != nil { env.Log.Printf("Optimization invalid: %v", err) return params.DefaultAllocation(), Score{}, err } + // postprocessing must be done after successfull validation + vals := postProcess(params, opt) + env.Log.Printf("Optimization postprocess. %s => %s", opt.String(), vals.String()) + env.Log.Printf("Optimization done. Score: %v %s totalCPUs=%d", score.String(), vals.String(), totCPUs) return vals, score, nil } +func postProcess(params Params, vals Values) Values { + Tc := params.TotalCPUs() + sl := params.SMTLevel() + x_c := asMultipleOf(vals.ReservedCPUCount, sl) + ret := Values{ + ReservedCPUCount: x_c, + IsolatedCPUCount: Tc - x_c, + } + return ret +} + func asMultipleOf(v, x int) int { r := v % x if r == 0 { From 6d449223d8fac58b9c49ec32af682da9c9fbf368 Mon Sep 17 00:00:00 2001 From: Francesco Romani Date: Wed, 16 Jul 2025 16:41:52 +0200 Subject: [PATCH 3/6] autosize: handle SMT in autosizing consider the real SMT Level when doing autosize computations. Signed-off-by: Francesco Romani --- pkg/performanceprofile/profilecreator/autosize/autosize.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pkg/performanceprofile/profilecreator/autosize/autosize.go b/pkg/performanceprofile/profilecreator/autosize/autosize.go index 5857c7ad39..916c6f24eb 100644 --- a/pkg/performanceprofile/profilecreator/autosize/autosize.go +++ b/pkg/performanceprofile/profilecreator/autosize/autosize.go @@ -91,7 +91,7 @@ func (p Params) SMTLevel() int { func (p Params) DefaultControlPlaneCores() int { // intentionally overallocate to have a safe baseline Tc := p.totalCPUs - return int(math.Round(float64(Tc) * defaultReservedRatioInitial)) // TODO handle SMT + return int(math.Round(float64(Tc) * defaultReservedRatioInitial)) } // Get x_c, x_w as initial hardcoded value. Subject to optimization @@ -147,7 +147,7 @@ func (vals Values) String() string { // https://github.com/gonum/gonum/issues/1725 func Validate(params Params, vals Values) error { Tc := params.TotalCPUs() - if vals.ReservedCPUCount < 1 { // TODO handle SMT + if vals.ReservedCPUCount < params.SMTLevel() { return ErrUnderallocatedControlPlane } if vals.ReservedCPUCount > int(math.Round((float64(Tc) * defaultReservedRatioMax))) { // works, but likely unacceptable From cc52c5f1bfd0f60b84fe332ca92de76becca2dab Mon Sep 17 00:00:00 2001 From: Francesco Romani Date: Wed, 16 Jul 2025 18:30:06 +0200 Subject: [PATCH 4/6] WIP Signed-off-by: Francesco Romani --- .../profilecreator/autosize/autosize.go | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/pkg/performanceprofile/profilecreator/autosize/autosize.go b/pkg/performanceprofile/profilecreator/autosize/autosize.go index 916c6f24eb..1798b8cb3b 100644 --- a/pkg/performanceprofile/profilecreator/autosize/autosize.go +++ b/pkg/performanceprofile/profilecreator/autosize/autosize.go @@ -183,9 +183,6 @@ func objective(p Params, x []float64) float64 { // Must use positive CPU values (since gonum/optimize doesn't have simple bounds for all solvers) hardPenalty += defaultPenaltyWeight*math.Pow(math.Max(0, -x_c), 2) + math.Pow(math.Max(0, -x_w), 2) - // Allocate in multiples of SMT level (usually 2) -- TODO: should be soft? - hardPenalty += defaultPenaltyWeight * math.Pow(math.Max(0, -float64(int(math.Round(x_c))%p.SMTLevel())), 2) - return target + hardPenalty } @@ -217,11 +214,9 @@ func Compute(env Env, params Params) (Values, Score, error) { totCPUs := params.TotalCPUs() score := Score{Cost: result.F} - x_c := int(math.Round(result.Location.X[0])) - opt := Values{ - ReservedCPUCount: x_c, - IsolatedCPUCount: totCPUs - x_c, // we can use x_w, but we just leverage invariants + ReservedCPUCount: int(math.Round(result.Location.X[0])), + IsolatedCPUCount: int(math.Round(result.Location.X[1])), } env.Log.Printf("Optimization result: %s", opt.String()) From 877818c797bebb3432e7fbd84cd93bd081c61ba4 Mon Sep 17 00:00:00 2001 From: Francesco Romani Date: Wed, 16 Jul 2025 17:16:25 +0200 Subject: [PATCH 5/6] WIP: prevalidate and postvalidate Signed-off-by: Francesco Romani --- .../profilecreator/autosize/autosize.go | 28 ++++++++++++++++--- 1 file changed, 24 insertions(+), 4 deletions(-) diff --git a/pkg/performanceprofile/profilecreator/autosize/autosize.go b/pkg/performanceprofile/profilecreator/autosize/autosize.go index 1798b8cb3b..e22dff6961 100644 --- a/pkg/performanceprofile/profilecreator/autosize/autosize.go +++ b/pkg/performanceprofile/profilecreator/autosize/autosize.go @@ -38,6 +38,7 @@ const ( ) var ( + ErrInvalidParameters = errors.New("invalid parameters") ErrUnderallocatedControlPlane = errors.New("not enough CPUs for control plane") ErrOverallocatedControlPlane = errors.New("too many CPUs for control plane") ErrInconsistentAllocation = errors.New("inconsistent CPus allocation") @@ -54,6 +55,7 @@ func DefaultEnv() Env { } type Params struct { + DeviceCount int OfflinedCPUCount int UserLevelNetworking bool MachineData *profilecreator.GHWHandler @@ -63,7 +65,7 @@ type Params struct { } func (p Params) String() string { - return fmt.Sprintf("cpus=%d offline=%v SMTLevel=%v", p.totalCPUs, p.OfflinedCPUCount, p.smtLevel) + return fmt.Sprintf("cpus=%d offline=%v SMTLevel=%v devices=%d (userNetworking=%v)", p.totalCPUs, p.OfflinedCPUCount, p.smtLevel, p.DeviceCount, p.UserLevelNetworking) } func setupMachineData(p *Params) error { @@ -143,9 +145,23 @@ func (vals Values) String() string { return fmt.Sprintf("reserved=%v/isolated=%v", vals.ReservedCPUCount, vals.IsolatedCPUCount) } +func CheckParameters(params Params) error { + if params.DeviceCount < 0 { + return ErrInvalidParameters + } + if params.OfflinedCPUCount < 0 { + return ErrInvalidParameters + } + // are we offlining everything? we need at least 1 physical core to do any work, including staying alive + if params.OfflinedCPUCount > (params.totalCPUs - params.smtLevel) { + return ErrInvalidParameters + } + return nil +} + // gonum doesn't support bounds yet so we have to make this an explicit step // https://github.com/gonum/gonum/issues/1725 -func Validate(params Params, vals Values) error { +func CheckValues(params Params, vals Values) error { Tc := params.TotalCPUs() if vals.ReservedCPUCount < params.SMTLevel() { return ErrUnderallocatedControlPlane @@ -187,7 +203,11 @@ func objective(p Params, x []float64) float64 { } func Compute(env Env, params Params) (Values, Score, error) { - err := setupMachineData(¶ms) + err := CheckParameters(params) + if err != nil { + return params.DefaultAllocation(), Score{}, err + } + err = setupMachineData(¶ms) if err != nil { env.Log.Printf("Optimization failed: %v", err) return params.DefaultAllocation(), Score{}, err @@ -220,7 +240,7 @@ func Compute(env Env, params Params) (Values, Score, error) { } env.Log.Printf("Optimization result: %s", opt.String()) - if err := Validate(params, opt); err != nil { + if err := CheckValues(params, opt); err != nil { env.Log.Printf("Optimization invalid: %v", err) return params.DefaultAllocation(), Score{}, err } From 6005e98bd3dba329d3c6b27e3cbe6871f17204c2 Mon Sep 17 00:00:00 2001 From: Francesco Romani Date: Wed, 16 Jul 2025 17:16:43 +0200 Subject: [PATCH 6/6] WIP: minimal CPU to fit IRQ count Signed-off-by: Francesco Romani --- .../profilecreator/autosize/autosize.go | 18 +++++++++++++++++- .../profilecreator/cmd/root.go | 3 +++ 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/pkg/performanceprofile/profilecreator/autosize/autosize.go b/pkg/performanceprofile/profilecreator/autosize/autosize.go index e22dff6961..856a339b68 100644 --- a/pkg/performanceprofile/profilecreator/autosize/autosize.go +++ b/pkg/performanceprofile/profilecreator/autosize/autosize.go @@ -30,6 +30,12 @@ import ( // Objective: // We want to maximize x_w, or, equivalently, minimize x_c +const ( + // x86 limit. 256 hardware entries, of those 32 reserved. 256-32 = 224. + // see: https://en.wikipedia.org/wiki/Interrupt_request + maxIRQsPerPhysicalCore int = 224 +) + const ( defaultPenaltyWeight float64 = 100.0 defaultReservedRatioInitial float64 = 0.0625 // 1/16. determined empirically. Use only as initial value. @@ -65,7 +71,7 @@ type Params struct { } func (p Params) String() string { - return fmt.Sprintf("cpus=%d offline=%v SMTLevel=%v devices=%d (userNetworking=%v)", p.totalCPUs, p.OfflinedCPUCount, p.smtLevel, p.DeviceCount, p.UserLevelNetworking) + return fmt.Sprintf("cpus=%d offline=%v SMTLevel=%v devices=%d (req=%v userNetworking=%v)", p.totalCPUs, p.OfflinedCPUCount, p.smtLevel, p.DeviceCount, p.MinCPUs(), p.UserLevelNetworking) } func setupMachineData(p *Params) error { @@ -82,6 +88,13 @@ func setupMachineData(p *Params) error { return nil } +func (p Params) MinCPUs() int { + if !p.UserLevelNetworking { // TODO explain why + return 0 + } + return (p.DeviceCount + (maxIRQsPerPhysicalCore - 1)) / maxIRQsPerPhysicalCore +} + func (p Params) TotalCPUs() int { return p.totalCPUs } @@ -193,6 +206,9 @@ func objective(p Params, x []float64) float64 { // Don't exceed total CPUs hardPenalty += defaultPenaltyWeight * math.Pow(math.Max(0, x_c+x_w-float64(p.TotalCPUs())), 2) + // Allocate as minimum what is needed to fit the desired amount of devices, thus IRQs + hardPenalty += defaultPenaltyWeight * math.Pow(math.Max(0, float64(p.MinCPUs())-x_c), 2) + // Meet the control plane/infra requirement to avoid the workload to starve hardPenalty += defaultPenaltyWeight * math.Pow(math.Max(0, p.controlPlaneRequirement(x_w)-x_c), 2) diff --git a/pkg/performanceprofile/profilecreator/cmd/root.go b/pkg/performanceprofile/profilecreator/cmd/root.go index 7b70f291ae..a703757678 100644 --- a/pkg/performanceprofile/profilecreator/cmd/root.go +++ b/pkg/performanceprofile/profilecreator/cmd/root.go @@ -171,6 +171,7 @@ func NewRootCommand() *cobra.Command { } if isAutosizeEnabled(pcArgs) { params := autosize.Params{ + DeviceCount: pcArgs.DeviceCount, OfflinedCPUCount: pcArgs.OfflinedCPUCount, UserLevelNetworking: (pcArgs.UserLevelNetworking != nil && *pcArgs.UserLevelNetworking), MachineData: nodesHandlers[0], // assume all nodes equal, pick the easiest @@ -433,6 +434,7 @@ type ProfileCreatorArgs struct { PerPodPowerManagement *bool `json:"per-pod-power-management,omitempty"` EnableHardwareTuning bool `json:"enable-hardware-tuning,omitempty"` Autosize *bool `json:"autosize,omitempty"` + DeviceCount int `json:"device-count,omitempty"` // internal only this argument not passed by the user // but detected automatically createForHypershift bool @@ -454,6 +456,7 @@ func (pca *ProfileCreatorArgs) AddFlags(flags *pflag.FlagSet) { flags.BoolVar(&pca.EnableHardwareTuning, "enable-hardware-tuning", false, "Enable setting maximum cpu frequencies") flags.StringVar(&pca.NodePoolName, "node-pool-name", "", "Node pool name corresponding to the target machines (HyperShift only)") flags.BoolVar(pca.Autosize, "autosize", false, "autosize the control plane") + flags.IntVar(&pca.DeviceCount, "device-count", 0, "Number of expected devices (TODO)") } func makePerformanceProfileFrom(profileData ProfileData) (runtime.Object, error) {