From e3c5666ceff68c9a60ac2693efb78c8d3619dea8 Mon Sep 17 00:00:00 2001 From: TaoChenImperial Date: Mon, 10 Nov 2025 16:04:27 +0000 Subject: [PATCH 01/19] fast qr for banded plus semiseparable matrices --- Project.toml | 6 +- paper/main.tex | 665 +++++++++++++++++++++++++ src/BandedPlusSemiseparableMatrices.jl | 376 ++++++++++++++ test/test_QR.jl | 29 ++ test/test_getindex.jl | 64 +++ 5 files changed, 1138 insertions(+), 2 deletions(-) create mode 100644 paper/main.tex create mode 100644 src/BandedPlusSemiseparableMatrices.jl create mode 100644 test/test_QR.jl create mode 100644 test/test_getindex.jl diff --git a/Project.toml b/Project.toml index 73ce137..14ff321 100644 --- a/Project.toml +++ b/Project.toml @@ -1,5 +1,5 @@ -name = "SemiseparableMatrices" -uuid = "f8ebbe35-cbfb-4060-bf7f-b10e4670cf57" +name = "BandedPlusSemiseparableMatrices" +uuid = "baa84449-aec6-4dc3-ad95-190cb16cdd31" authors = ["Sheehan Olver "] version = "0.4.0" @@ -10,6 +10,8 @@ BlockBandedMatrices = "ffab5731-97b5-5995-9138-79e8c1846df0" LazyArrays = "5078a376-72f3-5289-bfd5-ec5146d43c02" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" MatrixFactorizations = "a3b82374-2e81-5b9e-98ce-41277c0e4c87" +Revise = "295af30f-e4ad-537b-8983-00126c2a3abe" +SemiseparableMatrices = "f8ebbe35-cbfb-4060-bf7f-b10e4670cf57" [compat] ArrayLayouts = "1.0" diff --git a/paper/main.tex b/paper/main.tex new file mode 100644 index 0000000..851b6d8 --- /dev/null +++ b/paper/main.tex @@ -0,0 +1,665 @@ +\documentclass{article} +\usepackage{graphicx} % Required for inserting images +\usepackage{amsmath} +\usepackage{amssymb} +\usepackage{amsthm} +\usepackage{algorithm} +\usepackage{algpseudocode} +\newtheorem{theorem}{Theorem} +\newtheorem{lemma}{Lemma} +\newtheorem{definition}{Definition} +\title{A fast QR decomposition for banded plus semiseparable matrices} +\author{Tao Chen} + + +\begin{document} + +\maketitle + +Consider an $n$ by $n$ banded plus semiseparable matrix $A$ with rank $r$ in its lower triangular part and rank $p$ in its upper triangular part, meanwhile, the banded part has lower bandwidth $l$ and upper bandwidth $m$. + +$A$ can be formulated as + +\begin{equation} +A:=tril(UV^T, -1) + B + triu(WS^T, 1)\label{express_A} +\end{equation} + +where $U \in \mathbb{R}^{n\times r}$, $V\in \mathbb{R}^{n\times r}$, $W\in \mathbb{R}^{n\times p}$, and $S\in \mathbb{R}^{n \times p}$. Also, $B$ is a banded matrix with lower bandwidth $l$ and upper bandwidth $m$, which can be represented as $B:=(b_{ij})_{i,j=1}^n\in \mathbb{R}^{n,n}$ with $b_{ij}=0$ if $i-j>l$ or $j-i>m$. + +If we define $\bar{\mathbf{u}}_i:=U[i,:]^T\in \mathbb{R}^r$, $\bar{\mathbf{v}}_i:=V[i,:]^T\in \mathbb{R}^r$, $\bar{\mathbf{w}}_i:=W[i,:]^T\in \mathbb{R}^p$, and $\bar{\mathbf{s}}_i:=S[i,:]^T\in \mathbb{R}^p$ for $i=1,...,n$, we have: +\begin{equation} +A = +\begin{bmatrix} +b_{11} & \bar{\mathbf{w}}_1^T \bar{\mathbf{s}}_2 + b_{12} & \bar{\mathbf{w}}_1^T \bar{\mathbf{s}}_3 + b_{13} & \cdots & \bar{\mathbf{w}}_1^T \bar{\mathbf{s}}_n + b_{1n} \\ +\bar{\mathbf{u}}_2^T \bar{\mathbf{v}}_1 + b_{21} & b_{22} & \bar{\mathbf{w}}_2^T \bar{\mathbf{s}}_3 + b_{23} & \cdots & \bar{\mathbf{w}}_2^T \bar{\mathbf{s}}_n + b_{2n} \\ +\bar{\mathbf{u}}_3^T \bar{\mathbf{v}}_1 + b_{31} & \bar{\mathbf{u}}_3^T \bar{\mathbf{v}}_2 + b_{32} & b_{33} & \cdots & \bar{\mathbf{w}}_3^T \bar{\mathbf{s}}_n + b_{3n} \\ +\vdots & \vdots & \vdots & \ddots & \vdots \\ +\bar{\mathbf{u}}_n^T \bar{\mathbf{v}}_1 + b_{n1} & \bar{\mathbf{u}}_n^T \bar{\mathbf{v}}_2 + b_{n2} & \bar{\mathbf{u}}_n^T \bar{\mathbf{v}}_3 + b_{n3} & \cdots & b_{nn} +\end{bmatrix} +\end{equation} + +After applying QR decomposition to $A$, the resulting factor matrix $F$, in which the upper triangular portion stores the matrix $R$, and the lower triangular portion contains the Householder reflection vectors $\mathbf{y}$ generated during the decomposition processretain, is again a banded plus semiseparable structure. Specifically, the lower semiseparable part has rank $r$, the upper semiseparable part has rank $r+p$, the lower bandwidth is $l$, and the upper bandwidth is $l+m$. + +Next, we demonstrate by induction why the factor matrix $F$ mantains such a banded plus semiseparable structure. + +Before we start, an important notation will be: for a matrix $S$, let $S[i:j,m:n]$ represent the submatrix of $S$ from row $i$ to row $j$ and from column $m$ to column $n$. When $i=j$ or $m=n$, the notation will be simplified as $S[i,m:n]$ or $S[i:j,m]$. + +Let's first introduce a definition and two very helpful lemmas: + +\begin{definition}[Householder-modified banded-plus-semiseparable matrix] + Given an $n\times n$ banded-plus-semiseparable matrix(bpsm) $A=tril(UV^T, -1) + B + triu(WS^T, 1)$ where $U\in \mathbb{R}^{n\times r}$, $V\in \mathbb{R}^{n\times r}$, $W \in \mathbb{R}^{n\times p}$, $S \in \mathbb{R}^{n\times r}$, and $B$ a banded matrix with lower bandwidth $l$ and upper bandwidth $m$, a matrix $C$ is called a Householder-modified banded-plus-semiseparable matrix(hmbpsm) to $A$ if + + \begin{equation} + C=A+UQS^T+UKU^TA+UE+XS^T+YU^TA+Z + \end{equation} + for some $Q\in \mathbb{R}^{r\times p}$, $K\in \mathbb{R}^{r\times r}$, $E=[E_s,\mathbf{0}]\in \mathbb{R}^{r \times n}$ with $E_s\in \mathbb{R}^{r\times \min(l+m,n)}$; $X=\begin{bmatrix} +X_s \\ \mathbf{0} \end{bmatrix}\in \mathbb{R}^{n\times p}$ with $X_s\in \mathbb{R}^{\min(l,n)\times p}$; $Y=\begin{bmatrix} +Y_s \\ \mathbf{0} \end{bmatrix}\in \mathbb{R}^{n \times r}$ with $Y_s\in \mathbb{R}^{\min(l,n)\times r}$; and $Z=\begin{bmatrix} +Z_s & \mathbf{0} \\ \mathbf{0} & \mathbf{0} \end{bmatrix}\in \mathbb{R}^{n \times n}$ with $Z_s \in \mathbb{R}^{\min(l,n)\times \min(l+m,n)}$. +\end{definition} + +\begin{definition}[Householder-modified banded-plus-semiseparable vector] + +Let $A \in \mathbb{R}^{n \times n}$ be a banded-plus-semiseparable matrix (bpsm) as defined previously. A vector $\mathbf{c}$ of length $n-1$ is called a Householder-modified banded-plus-semiseparable vector(hmbpsv) to $A$ if + \begin{equation} + \mathbf{c}^T=\mathbf{d}^T+\boldsymbol{\alpha}^T (S^T[:,2:n])+ \boldsymbol{\beta}^T ((U^TA)[:,2:n]) + \end{equation} +for some $\mathbf{d}=\begin{bmatrix} +\mathbf{d}_s \\ \mathbf{0} \end{bmatrix} \in \mathbb{R}^{n-1}$ with $\mathbf{d}_s\in \mathbb{R}^{\min(l+m,n)}$, $\boldsymbol{\alpha}\in \mathbb{R}^p$, and $\boldsymbol{\beta}\in \mathbb{R}^r$. +\end{definition} + +\begin{lemma} \label{helpful_lemma} + +Given a bpsm $A$ and its hmbpsm $C$, Suppose a Householder transformation is applied to $C$ to eliminate +the subdiagonal entries of its first column, and denote the resulting matrix by +$\tilde{C}$. Then the following hold: +\begin{enumerate} + \item The submatrix $\tilde{C}[2:n,\,2:n]$ + is an hmbpsm to $A[2:n,\,2:n]$. + \item + $\tilde{C}[1,\,2:n]$ is an hmbpsv to $A$. +\end{enumerate} +\end{lemma} + + +\begin{proof} + +Let's first introduce some notations: + +$A:=tril(UV^T, -1) + B + triu(WS^T, 1)$ where $U=(\mathbf{u}_1,...,\mathbf{u}_r) \in \mathbb{R}^{n\times r}$, $V=(\mathbf{v}_1,...,\mathbf{v}_r)\in \mathbb{R}^{n\times r}$, $W=(\mathbf{w}_1,...,\mathbf{w}_p)\in \mathbb{R}^{n\times p}$, and $S=(\mathbf{s}_1,...,\mathbf{s}_p)\in \mathbb{R}^{n \times p}$. Here $\mathbf{u}_i=(u_1^{(i)},...,u_n^{(i)})^T\in \mathbb{R}^n$ and $\mathbf{v}_i=(v_1^{(i)},...,v_n^{(i)})^T\in \mathbb{R}^n$ for $i=1,...,r$; $\mathbf{w}_i=(w_1^{(i)},...,w_n^{(i)})^T\in \mathbb{R}^n$ and $\mathbf{s}_i=(s_1^{(i)},...,s_n^{(i)})^T\in \mathbb{R}^n$ for $i=1,...,p$. Also, $B=(b_{ij})_{i,j=1}^n\in \mathbb{R}^{n,n}$ with $b_{ij}=0$ if $i-j>l$ or $j-i>m$. + +$C:=A+UQS^T+UKU^TA+UE+XS^T+YU^TA+Z$ where $Q\in \mathbb{R}^{r\times p}$ and $K\in \mathbb{R}^{r\times r}$; $E=[E_s,\mathbf{0}]\in \mathbb{R}^{r \times n}$ with $E_s\in \mathbb{R}^{r\times\min(l+m,n)}$; $X=\begin{bmatrix} +X_s \\ \mathbf{0} \end{bmatrix}\in \mathbb{R}^{n\times p}$ with $X_s\in \mathbb{R}^{\min(l,n)\times p}$; $Y=\begin{bmatrix} +Y_s \\ \mathbf{0} \end{bmatrix}\in \mathbb{R}^{n \times r}$ with $Y_s\in \mathbb{R}^{\min(l,n)\times r}$; $Z=\begin{bmatrix} +Z_s & \mathbf{0} \\ \mathbf{0} & \mathbf{0} \end{bmatrix}\in \mathbb{R}^{n \times n}$ with $Z_s \in \mathbb{R}^{\min(l,n)\times\min(l+m,n)}$. + +$\tilde{C}:=(I-\tau\mathbf{y}\mathbf{y}^T)C$ where $I-\tau\mathbf{y}\mathbf{y}^T$ is a Householder transformation for eliminating the first column of $C$(below the diagonal). Here $\tau$ is a coefficient. From the expression of $C$, it is easy to see that $\mathbf{y}$ can be expressed as $\mathbf{y}=\mathbf{e}_1+U^{(2)}\bar{\mathbf{k}}+\mathbf{b}$ with $\mathbf{e}_1=(1,0,...,0)^T\in \mathbb{R}^n$, $U^{(2)}\in \mathbb{R}^{n\times r}$ s.t. $U^{(2)}[1,:]=0$ and $U^{(2)}[2:n,:]=U[2:n,:]$, $\bar{\mathbf{k}}=(\bar{k}_1,...,\bar{k}_r)^T\in \mathbb{R}^r$, and $\mathbf{b}=(0,b_2,...,b_{min(l+1,n)},0,...,0)^T\in \mathbb{R}^n$. + +Let $\bar{\mathbf{u}}_1:=(u_1^{(1)},...,u_1^{(r)})^T\in \mathbb{R}^r$; write $$\mathbf{e}_1^TA=\mathbf{d}_1^T+\bar{\mathbf{w}}_1^TS^T$$ where $\mathbf{d}_1=B[1,:]^T\in \mathbb{R}^n$, $\bar{\mathbf{w}}_1=(w_1^{(1)},...,w_1^{(p)})^T\in \mathbb{R}^p$, + +and $$\mathbf{b}^TA=\bar{\mathbf{d}}^T+\mathbf{f}^TS^T$$ where $\bar{\mathbf{d}}=(\bar{d}_1,...,\bar{d}_{\min(l+m+1,n)},0,...,0)^T\in \mathbb{R}^n$, $\mathbf{f}=W^T\mathbf{b}\in \mathbb{R}^{p}$. + +Next, define the following $6$ variables: $\mathbf{c}_1:=Q^TU^T\mathbf{y} \in \mathbb{R}^p$, $\mathbf{c}_2:=K^TU^T\mathbf{y} \in \mathbb{R}^r$, $\mathbf{c}_3:=U^T\mathbf{y} \in \mathbb{R}^r$, $\mathbf{c}_4:=X^T\mathbf{y} \in \mathbb{R}^p$, $\mathbf{c}_5:=Y^T\mathbf{y} \in \mathbb{R}^r$ and +$\mathbf{c}_6:=Z^T\mathbf{y}\in \mathbb{R}^{n}$. + +It is easy to see that $\mathbf{x}$ can be written as $\mathbf{c}_6=\begin{bmatrix} +\mathbf{c}_{6s} \\ \mathbf{0} \end{bmatrix}$ with $\mathbf{c}_{6s}\in \mathbb{R}^{\min(l+m,n)}$. + +At last, let $\mathbf{x}^{(1)}:=X[1,:]^T\in \mathbb{R}^p$, $\mathbf{y}^{(1)}:=Y[1,:]^T\in \mathbb{R}^r$, and $\mathbf{z}^{(1)}:=Z[1,:]^T\in \mathbb{R}^n$. + +To compute $(I-\tau\mathbf{y}\mathbf{y}^T)C$ where $C:=A+UQS^T+UKU^TA+UE+XS^T+YU^TA+Z$, we compute $(I-\tau\mathbf{y}\mathbf{y}^T)A$, $(I-\tau\mathbf{y}\mathbf{y}^T)UQS^T$, $(I-\tau\mathbf{y}\mathbf{y}^T)UKU^TA$, $(I-\tau\mathbf{y}\mathbf{y}^T)UE$, $(I-\tau\mathbf{y}\mathbf{y}^T)XS^T$, $(I-\tau\mathbf{y}\mathbf{y}^T)YU^TA$, and $(I-\tau\mathbf{y}\mathbf{y}^T)Z$ separately. + +(i) By writing $\mathbf{y}=\mathbf{e}_1+U^{(2)}\bar{\mathbf{k}}+\mathbf{b}$, $\mathbf{e}_1^TA=\mathbf{d}_1^T+\bar{\mathbf{w}}_1^TS^T$, and $\mathbf{b}^TA=\bar{\mathbf{d}}^T+\mathbf{f}^TS^T$, we get + +\begin{equation}\label{proof_first} + \begin{aligned} + (I-\tau\mathbf{y}\mathbf{y}^T)A=&A+\mathbf{e_1}(-\tau\mathbf{d}_1^T-\tau\bar{\mathbf{d}}^T)+\mathbf{e_1}(-\tau\bar{\mathbf{w}}_1^T-\tau\mathbf{f}^T)S^T\\&+\mathbf{e}_1(-\tau\bar{\mathbf{k}}^T)U^{(2)T}A+U^{(2)}(-\tau\bar{\mathbf{k}}\bar{\mathbf{w}}_1^T-\tau\bar{\mathbf{k}}\mathbf{f}^T)S^T\\& + +U^{(2)}(-\tau\bar{\mathbf{k}}\bar{\mathbf{k}}^T)U^{(2)T}A+U^{(2)}(-\tau\bar{\mathbf{k}}\mathbf{d}_1^T-\tau\bar{\mathbf{k}}\bar{\mathbf{d}}^T)\\& + +(-\tau \mathbf{b}\bar{\mathbf{w}}_1^T-\tau\mathbf{b}\mathbf{f}^T)S^T+(-\tau\mathbf{b}\bar{\mathbf{k}}^T)U^{(2)T}A\\&+(-\tau\mathbf{b}\mathbf{d}_1^T-\tau\mathbf{b}\bar{\mathbf{d}}^T) + \end{aligned} +\end{equation} + +(ii) By writing $\mathbf{y}^TUQ=\mathbf{c}_1^T$, $\mathbf{y}=\mathbf{e}_1+U^{(2)}\bar{\mathbf{k}}+\mathbf{b}$, and $U=\mathbf{e}_1\bar{\mathbf{u}}_1^T+U^{(2)}$ where $\bar{\mathbf{u}}_1=(u_1^{(1)},...,u_1^{(r)})\in \mathbb{R}^r$, we get + +\begin{equation} + \begin{aligned} + (I-\tau\mathbf{y}\mathbf{y}^T)UQS^T=\mathbf{e}_1(\bar{\mathbf{u}}_1^TQ-\tau\mathbf{c}_1^T)S^T+U^{(2)}(Q-\tau\bar{\mathbf{k}}\mathbf{c}_1^T)S^T+(-\tau\mathbf{b}\mathbf{c}_1^T)S^T + \end{aligned} +\end{equation} + +(iii) By writing $\mathbf{y}^TUK=\mathbf{c}_2^T$, $\mathbf{y}=\mathbf{e}_1+U^{(2)}\bar{\mathbf{k}}+\mathbf{b}$, $U=\mathbf{e}_1\bar{\mathbf{u}}_1^T+U^{(2)}$, and $\mathbf{e}_1^TA=\mathbf{d}_1^T+\bar{\mathbf{w}}_1^TS^T$, we get + +\begin{equation} + \begin{aligned} + &(I-\tau\mathbf{y}\mathbf{y}^T)UKU^TA\\=&\mathbf{e}_1(\bar{\mathbf{u}}_1^TK\bar{\mathbf{u}}_1\mathbf{d}_1^T-\tau\mathbf{c}_2^T\bar{\mathbf{u}}_1\mathbf{d}_1^T)+\mathbf{e}_1(\bar{\mathbf{u}}_1^TK\bar{\mathbf{u}}_1\bar{\mathbf{w}}_1^T-\tau\mathbf{c}_2^T\bar{\mathbf{u}}_1\bar{\mathbf{w}}_1^T)S^T\\&+\mathbf{e}_1(\bar{\mathbf{u}}_1^TK-\tau\mathbf{c}_2^T)U^{(2)T}A + +U^{(2)}(K\bar{\mathbf{u}}_1\bar{\mathbf{w}}_1^T-\tau\bar{\mathbf{k}}\mathbf{c}_2^T\bar{\mathbf{u}}_1\bar{\mathbf{w}}_1^T)S^T\\&+U^{(2)}(K-\tau\bar{\mathbf{k}}\mathbf{c}_2^T)U^{(2)T}A+U^{(2)}(K\bar{\mathbf{u}}_1\mathbf{d}_1^T-\tau\bar{\mathbf{k}}\mathbf{c}_2^T\bar{\mathbf{u}}_1\mathbf{d}_1^T)\\&+(-\tau\mathbf{b}\mathbf{c}_2^T\bar{\mathbf{u}}_1\bar{\mathbf{w}}_1^T)S^T+(-\tau\mathbf{b}\mathbf{c}_2^T)U^{(2)T}A+(-\tau\mathbf{b}\mathbf{c}_2^T\bar{\mathbf{u}}_1\mathbf{d}_1^T) + \end{aligned} +\end{equation} + +(iv) By writing $\mathbf{y}^TU=\mathbf{c}_3^T$, $\mathbf{y}=\mathbf{e}_1+U^{(2)}\bar{\mathbf{k}}+\mathbf{b}$, and $U=\mathbf{e}_1\bar{\mathbf{u}}_1^T+U^{(2)}$, we get + +\begin{equation} + \begin{aligned} + (I-\tau\mathbf{y}\mathbf{y}^T)UE=\mathbf{e}_1(\bar{\mathbf{u}}_1^TE-\tau\mathbf{c}_3^TE)+U^{(2)}(E-\tau\bar{\mathbf{k}}\mathbf{c}_3^TE)+(-\tau\mathbf{b}\mathbf{c}_3^TE) + \end{aligned} +\end{equation} + +(v) By writing $\mathbf{y}^TX=\mathbf{c}_4^T$ and $\mathbf{y}=\mathbf{e}_1+U^{(2)}\bar{\mathbf{k}}+\mathbf{b}$, we get + +\begin{equation} + \begin{aligned} + (I-\tau\mathbf{y}\mathbf{y}^T)XS^T=\mathbf{e}_1(-\tau\mathbf{c}_4^T)S^T+U^{(2)}(-\tau\bar{\mathbf{k}}\mathbf{c}_4^T)S^T+(X-\tau\mathbf{b}\mathbf{c}_4^T)S^T + \end{aligned} +\end{equation} + +(vi) By writing $\mathbf{y}^TY=\mathbf{c}_5^T$, $\mathbf{y}=\mathbf{e}_1+U^{(2)}\bar{\mathbf{k}}+\mathbf{b}$, $U=\mathbf{e}_1\bar{\mathbf{u}}_1^T+U^{(2)}$, and $\mathbf{e}_1^TA=\mathbf{d}_1^T+\bar{\mathbf{w}}_1^TS^T$, we get + +\begin{equation} + \begin{aligned} + (I-\tau\mathbf{y}\mathbf{y}^T)YU^TA=&\mathbf{e}_1(-\tau\mathbf{c}_5^T\bar{\mathbf{u}}_1\mathbf{d}_1^T)+\mathbf{e}_1(-\tau\mathbf{c}_5^T\bar{\mathbf{u}}_1\bar{\mathbf{w}}_1^T)S^T+\mathbf{e}_1(-\tau\mathbf{c}_5^T)U^{(2)T}A\\& + +U^{(2)}(-\tau\bar{\mathbf{k}}\mathbf{c}_5^T\bar{\mathbf{u}}_1\bar{\mathbf{w}}_1^T)S^T+U^{(2)}(-\tau\bar{\mathbf{k}}\mathbf{c}_5^T)U^{(2)T}A\\&+U^{(2)}(-\tau\bar{\mathbf{k}}\mathbf{c}_5^T\bar{\mathbf{u}}_1\mathbf{d}_1^T)+(Y\bar{\mathbf{u}}_1\bar{\mathbf{w}}_1^T-\tau\mathbf{b}\mathbf{c}_5^T\bar{\mathbf{u}}_1\bar{\mathbf{w}}_1^T)S^T\\&+(Y-\tau\mathbf{b}\mathbf{c}_5^T)U^{(2)T}A+(Y\bar{\mathbf{u}}_1\mathbf{d}_1^T-\tau\mathbf{b}\mathbf{c}_5^T\bar{\mathbf{u}}_1\mathbf{d}_1^T) + \end{aligned} +\end{equation} + +(vii) By writing $\mathbf{y}^TZ=\mathbf{c}_6^T$ and $\mathbf{y}=\mathbf{e}_1+U^{(2)}\bar{\mathbf{k}}+\mathbf{b}$, we get + +\begin{equation}\label{proof_last} +\begin{aligned} + (I-\tau\mathbf{y}\mathbf{y}^T)Z=\mathbf{e}_1(-\tau\mathbf{c}_6^T)+U^{(2)}(-\tau\bar{\mathbf{k}}\mathbf{c}_6^T)+(Z-\tau\mathbf{b}\mathbf{c}_6^T) +\end{aligned} +\end{equation} + +Combining Eq.(\ref{proof_first}) to (\ref{proof_last}), we obtain + +\textbf{firstly}, +\begin{equation}\label{submatrix_structure} +\tilde{C}[2:n,2:n]=\tilde{A}+\tilde{U}\tilde{Q}\tilde{S}^T+\tilde{U}\tilde{K}\tilde{U}^T\tilde{A}+\tilde{U}\tilde{E}+\tilde{X}\tilde{S}^T+\tilde{Y}\tilde{U}^T\tilde{A}+\tilde{Z} +\end{equation} + +where + +\begin{equation} + \tilde{A}:=A[2:n,2:n]; +\end{equation} + +\begin{equation} + \tilde{U}:=U[2:n,:]; +\end{equation} + +\begin{equation} + \tilde{S}:=S[2:n,:]; +\end{equation} + +\begin{equation} +\begin{aligned} + \tilde{Q}:=&-\tau\bar{\mathbf{k}}\bar{\mathbf{w}}_1^T-\tau \bar{\mathbf{k}}\mathbf{f}^T+Q-\tau\bar{\mathbf{k}}\mathbf{c}_1^T+K\bar{\mathbf{u}}_1\bar{\mathbf{w}}_1^T\\& + -\tau\bar{\mathbf{k}}\mathbf{c}_2^T\bar{\mathbf{u}}_1\bar{\mathbf{w}}_1^T-\tau\bar{\mathbf{k}}\mathbf{c}_4^T-\tau\bar{\mathbf{k}}\mathbf{c}_5^T\bar{\mathbf{u}}_1\bar{\mathbf{w}}_1^T; + \end{aligned} +\end{equation} + +\begin{equation} + \tilde{K}:=-\tau\bar{\mathbf{k}}\bar{\mathbf{k}}^T+K-\tau\bar{\mathbf{k}}\mathbf{c}_2^T-\tau\bar{\mathbf{k}}\mathbf{c}_5^T; +\end{equation} + +\begin{equation} + \tilde{E}:=[\tilde{E}_s,\mathbf{0}] \in \mathbb{R}^{r\times (n-1)} +\end{equation} + +with + +\begin{equation} +\begin{aligned} + \tilde{E}_s:=&(-\tau\bar{\mathbf{k}}\mathbf{d}_1^T-\tau\bar{\mathbf{k}}\bar{\mathbf{d}}^T+K\bar{\mathbf{u}}_1\mathbf{d}_1^T-\tau\bar{\mathbf{k}}\mathbf{c}_2^T\bar{\mathbf{u}}_1\mathbf{d}_1^T+E\\&-\tau\bar{\mathbf{k}}\mathbf{c}_3^TE-\tau\bar{\mathbf{k}}\mathbf{c}_5^T\bar{\mathbf{u}}_1\mathbf{d}_1^T-\tau\bar{\mathbf{k}}\mathbf{c}_6^T)[:,2:\min(l+m+1,n)]; +\end{aligned} +\end{equation} + +\begin{equation} + \tilde{X}:=\begin{bmatrix} +\tilde{X}_s \\ \mathbf{0} \end{bmatrix} \in \mathbb{R}^{(n-1)\times p} +\end{equation} + +with + +\begin{equation} +\begin{aligned} + \tilde{X}_s:=&(-\tau \mathbf{b}\bar{\mathbf{w}}_1^T-\tau \mathbf{b}\mathbf{f}^T-\tau \mathbf{b}\mathbf{c}_1^T-\tau \mathbf{b}\mathbf{c}_2^T\bar{\mathbf{u}}_1\bar{\mathbf{w}}_1^T+X\\&-\tau\mathbf{b}\mathbf{c}_4^T+Y\bar{\mathbf{u}}_1\bar{\mathbf{w}}_1^T-\tau\mathbf{b}\mathbf{c}_5^T\bar{\mathbf{u}}_1\bar{\mathbf{w}}_1^T)[2:\min(l+1,n),:]; +\end{aligned} +\end{equation} + +\begin{equation} + \tilde{Y}:= \begin{bmatrix} +\tilde{Y}_s \\ \mathbf{0} \end{bmatrix} \in \mathbb{R}^{(n-1)\times r} +\end{equation} + +with + +\begin{equation} + \tilde{Y}_s:=(-\tau \mathbf{b}\bar{\mathbf{k}}^T-\tau\mathbf{b}\mathbf{c}_2^T+Y-\tau\mathbf{b}\mathbf{c}_5^T)[2:\min(l+1,n),:]; +\end{equation} + +\begin{equation} + \tilde{Z}:= \begin{bmatrix} +\tilde{Z}_s & \mathbf{0} \\ \mathbf{0} & \mathbf{0} \end{bmatrix}\in \mathbb{R}^{(n-1) \times (n-1)} +\end{equation} + +with + +\begin{equation} +\begin{aligned} + \tilde{Z}_s:=&(-\tau\mathbf{b}\mathbf{d}_1^T-\tau\mathbf{b}\bar{\mathbf{d}}^T-\tau\mathbf{b}\mathbf{c}_2^T\bar{\mathbf{u}}_1\mathbf{d}_1^T-\tau\mathbf{b}\mathbf{c}_3^TE+Y\bar{\mathbf{u}}_1\mathbf{d}_1^T-\tau\mathbf{b}\mathbf{c}_5^T\bar{\mathbf{u}}_1\mathbf{d}_1^T\\&+Z-\tau\mathbf{b}\mathbf{c}_6^T)[2:\min(l+1,n),2:\min(l+m+1,n)]. +\end{aligned} +\end{equation} + +\textbf{Secondly}, + +\begin{equation} +\tilde{C}[1,2:n]=\hat{\mathbf{d}}^T+\hat{\boldsymbol{\alpha}}^T (S^T[:,2:n])+ \hat{\boldsymbol{\beta}}^T ((U^{(2)T}A)[:,2:n]) +\end{equation} + +where + +\begin{equation} + \hat{\mathbf{d}}:= \begin{bmatrix} +\hat{\mathbf{d}}_s \\ \mathbf{0} \end{bmatrix} \in \mathbb{R}^{n-1} +\end{equation} + +with + +\begin{equation} +\begin{aligned} + \hat{\mathbf{d}}_s:=&(\mathbf{d}_1^T-\tau\mathbf{d}_1^T-\tau\bar{\mathbf{d}}^T+\bar{\mathbf{u}}_1^TK\bar{\mathbf{u}}_1\mathbf{d}_1^T-\tau\mathbf{c}_2^T\bar{\mathbf{u}}_1\mathbf{d}_1^T+\bar{\mathbf{u}}_1^TE-\tau\mathbf{c}_3^TE\\&+\mathbf{y}^{(1)T}\bar{\mathbf{u}}_1\mathbf{d}_1^T-\tau\mathbf{c}_5^T\bar{\mathbf{u}}_1\mathbf{d}_1^T+\mathbf{z}^{(1)T}-\tau\mathbf{c}_6^T)^T[2:\min(l+m+1,n)]; +\end{aligned} +\end{equation} + +\begin{equation} +\begin{aligned} + \hat{\boldsymbol{\alpha}}:=&(\bar{\mathbf{w}}_1^T-\tau\bar{\mathbf{w}}_1^T-\tau\mathbf{f}^T+\bar{\mathbf{u}}_1^TQ-\tau\mathbf{c}_1^T+\bar{\mathbf{u}}_1^TK\bar{\mathbf{u}}_1\bar{\mathbf{w}}_1^T\\&-\tau\mathbf{c}_2^T\bar{\mathbf{u}}_1\bar{\mathbf{w}}_1^T+\mathbf{x}^{(1)T}-\tau\mathbf{c}_4^T+\mathbf{y}^{(1)T}\bar{\mathbf{u}}_1\bar{\mathbf{w}}_1^T-\tau\mathbf{c}_5^T\bar{\mathbf{u}}_1\bar{\mathbf{w}}_1^T)^T\in \mathbb{R}^p; +\end{aligned} +\end{equation} + +and + +\begin{equation} + \hat{\boldsymbol{\beta}}:=(-\tau \bar{\mathbf{k}}^T+\bar{\mathbf{u}}_1^TK-\tau\mathbf{c}_2^T+\mathbf{y}^{(1)T}-\tau\mathbf{c}_5^T)^T\in \mathbb{R}^r. +\end{equation} + +From $U^{(2)}=U-\mathbf{e}_1\bar{\mathbf{u}}_1^T$ and $\mathbf{e}_1^TA=\mathbf{d}_1^T+\bar{\mathbf{w}}_1^TS^T$, we have $U^{(2)T}A=U^TA-\bar{\mathbf{u}}_1\bar{\mathbf{w}}_1^TS^T-\bar{\mathbf{u}}_1\mathbf{d}_1^T$, so alternatively, we can write $\tilde{C}[1,2:n]$ as: + +\begin{equation}\label{row_structure} +\tilde{C}[1,2:n]=\tilde{\mathbf{d}}^T+\tilde{\boldsymbol{\alpha}}^T (S^T[:,2:n])+ \tilde{\boldsymbol{\beta}}^T ((U^TA)[:,2:n]) +\end{equation} + +where + +\begin{equation} + \tilde{\mathbf{d}}:= \begin{bmatrix} +\tilde{\mathbf{d}}_s \\ \mathbf{0} \end{bmatrix} \in \mathbb{R}^{n-1} +\end{equation} + +with + +\begin{equation} +\begin{aligned} + \tilde{\mathbf{d}}_s:=&(\mathbf{d}_1^T-\tau\mathbf{d}_1^T-\tau\bar{\mathbf{d}}^T+\bar{\mathbf{u}}_1^TE-\tau\mathbf{c}_3^TE\\&+\mathbf{z}^{(1)T}-\tau\mathbf{c}_6^T+\tau\bar{\mathbf{k}}^T\bar{\mathbf{u}}_1\mathbf{d}_1^T)^T[2:\min(l+m+1,n)]; +\end{aligned} +\end{equation} + +\begin{equation} +\begin{aligned} + \tilde{\boldsymbol{\alpha}}:=&(\bar{\mathbf{w}}_1^T-\tau\bar{\mathbf{w}}_1^T-\tau\mathbf{f}^T+\bar{\mathbf{u}}_1^TQ-\tau\mathbf{c}_1^T+\mathbf{x}^{(1)T}-\tau\mathbf{c}_4^T+\tau\bar{\mathbf{k}}^T\bar{\mathbf{u}}_1\bar{\mathbf{w}}_1^T)^T\in \mathbb{R}^p; +\end{aligned} +\end{equation} + +and + +\begin{equation} + \tilde{\boldsymbol{\beta}}:=(-\tau \bar{\mathbf{k}}^T+\bar{\mathbf{u}}_1^TK-\tau\mathbf{c}_2^T+\mathbf{y}^{(1)T}-\tau\mathbf{c}_5^T)^T\in \mathbb{R}^r. +\end{equation} + +We have proven lemma \ref{helpful_lemma} from Eq (\ref{submatrix_structure}) and (\ref{row_structure}). + +\end{proof} + + +With this lemma, we can now prove the following theorem. + + +\begin{theorem} +After applying QR decomposition to a banded plus semiseparable matrix $A$(which is expressed in Eq.(\ref{express_A})) with lower rank $r$, upper rank $p$, lower bandwidth $l$, and upper bandwidth $m$, the factor matrix $F$ obtained is also a banded plus semiseparable matrix. Its lower semiseparable part has rank $r$ and upper semiseparable part has rank $r+p$; its lower bandwidth is $l$ and upper bandwidth is $l+m$. +\end{theorem} + +\begin{proof} + +The QR decomposition can be done by performing a series of Householder Transformations(HT) to eliminate elements below the diagonals of $A$ in the first column, the second column, ..., up to the $n-1$th column in order. + +First, we perform an HT to eliminate the first column, that is, computing $(I-\tau_1 \mathbf{y}_1 \mathbf{y}^T_1)A$, where $\mathbf{y}_1$ is a regularized reflection vector s.t. the first nonzero element in $\mathbf{y}_1$ is $1$, and $\tau_1$ is a coefficient found during the process of the first HT. It is easy to see that $\mathbf{y}_1$ can be represented as $\mathbf{y}_1=\mathbf{e}_1+U^{(2)}\bar{\mathbf{k}}_2+\mathbf{b}_2$ where $\bar{\mathbf{k}}_2\in \mathbb{R}^r$, $\mathbf{b}_2=(0,b^{(2)}_2,b^{(2)}_3,...,b^{(2)}_{\min(l+1,n)},0,...,0)^T \in R^n$. Therefore, the first column of $F$ below the diagonal can be determined as +\begin{equation} +F[2:n, 1]=(U\bar{\mathbf{k}}_2+\mathbf{b}_2)[2:n]. +\end{equation} + +Define $A_i=A[i:n,i:n]\in \mathbb{R}^{n+1-i,n+1-i}$, $U_i=U[i:n,:]\in \mathbb{R}^{(n+1-i)\times r}$, $V_i=V[i:n,:]\in \mathbb{R}^{(n+1-i)\times r}$, $S_i=S[i:n,:]\in \mathbb{R}^{(n+1-i)\times p}$, and $W_i=W[i:n,:]\in \mathbb{R}^{(n+1-i)\times p}$; $\bar{\mathbf{u}}_i:=(u_i^{(1)},...,u_i^{(r)})^T\in \mathbb{R}^r$ and $\bar{\mathbf{w}}_i=(w_i^{(1)},...,w_i^{(p)})^T\in \mathbb{R}^p$ for $i=1,...,n$. Let $A^{(i)}$ be matrix $A$ after the $i$th HT, we have $A^{(1)}:=(I-\tau_1 \mathbf{y}_1 \mathbf{y}^T_1)A$ now. Also let $\bar{S}:=U^TA\in \mathbb{R}^{r\times n}$, apply Eq.(\ref{submatrix_structure}) and (\ref{row_structure}) in lemma (\ref{helpful_lemma}) by setting $A=A_1$, $U=U_1$, $S=S_1$, $Q=\mathbf{0}$, $K=\mathbf{0}$, $E=\mathbf{0}$, $X=\mathbf{0}$, $Y=\mathbf{0}$, $Z=\mathbf{0}$, and compute $\mathbf{c}_1^{(1)}:=Q^TU_1^T\mathbf{y}_1=\mathbf{0}$, $\mathbf{c}_2^{(1)}:=K^TU_1^T\mathbf{y}_1=\mathbf{0}$, $\mathbf{c}_3^{(1)}:=U_1^T\mathbf{y}_1$, $\mathbf{c}_4^{(1)}:=X^T\mathbf{y}_1 =\mathbf{0}$, $\mathbf{c}_5^{(1)}:=Y^T\mathbf{y}_1=\mathbf{0}$ and +$\mathbf{c}_6^{(1)}:=Z^T\mathbf{y}_1=\mathbf{0}$, also write $\mathbf{e}_1^TA_1=\mathbf{d}_1^T+\bar{\mathbf{w}}_1^TS_1^T$ where $\mathbf{d}_1=B[1,:]^T\in \mathbb{R}^n$, and $\mathbf{b}_2^TA_1=\bar{\mathbf{d}}_1^T+\mathbf{f}_1^TS_1^T$ where $\bar{\mathbf{d}}_1=(\bar{d}_1,...,\bar{d}_{\min(l+m+1,n)},0,...,0)^T\in \mathbb{R}^n$, $\mathbf{f}_1=W_1^T\mathbf{b}_2\in \mathbb{R}^{p}$, we have: + +i. + +\begin{equation} +A ^{(1)}[2:n,2:n]=A_2+U_2Q_2S_2^T+U_2K_2U_2^TA_2+U_2E_2+X_2S_2^T+Y_2U_2^TA_2+Z_2 +\end{equation} +with +\begin{equation} +\begin{aligned} + Q_2:=-\tau_1\bar{\mathbf{k}}_2\bar{\mathbf{w}}_1^T-\tau_1 \bar{\mathbf{k}}_2\mathbf{f}_1^T + \end{aligned} +\end{equation} + +\begin{equation} + K_2:=-\tau_1\bar{\mathbf{k}}_2\bar{\mathbf{k}}_2^T; +\end{equation} + +\begin{equation} + E_2:=[E_s^{(2)},\mathbf{0}] \in \mathbb{R}^{r\times (n-1)} +\end{equation} + +with + +\begin{equation} +\begin{aligned} + E_s^{(2)}:=&(-\tau_1\bar{\mathbf{k}}_2\mathbf{d}_1^T-\tau_1\bar{\mathbf{k}}_2\bar{\mathbf{d}}_1^T)[:,2:\min(l+m+1,n)]; +\end{aligned} +\end{equation} + +\begin{equation} + X_2:=\begin{bmatrix} +X_s^{(2)} \\ \mathbf{0} \end{bmatrix} \in \mathbb{R}^{(n-1)\times p} +\end{equation} + +with + +\begin{equation} +\begin{aligned} + X_s^{(2)}:=&(-\tau_1 \mathbf{b}_2\bar{\mathbf{w}}_1^T-\tau_1 \mathbf{b}_2\mathbf{f}_1^T)[2:\min(l+1,n),:]; +\end{aligned} +\end{equation} + +\begin{equation} + Y_2:= \begin{bmatrix} +Y_s^{(2)} \\ \mathbf{0} \end{bmatrix} \in \mathbb{R}^{(n-1)\times r} +\end{equation} + +with + +\begin{equation} + Y_s^{(2)}:=(-\tau_1 \mathbf{b}_2\bar{\mathbf{k}}_2^T)[2:\min(l+1,n),:]; +\end{equation} + +\begin{equation} + Z_2:= \begin{bmatrix} +Z_s^{(2)} & \mathbf{0} \\ \mathbf{0} & \mathbf{0} \end{bmatrix}\in \mathbb{R}^{(n-1) \times (n-1)} +\end{equation} + +with + +\begin{equation} +\begin{aligned} + Z_s^{(2)}:=&(-\tau_1\mathbf{b}_2\mathbf{d}_1^T-\tau_1\mathbf{b}_2\bar{\mathbf{d}}_1^T)[2:\min(l+1,n),2:\min(l+m+1,n)]. +\end{aligned} +\end{equation} + +ii. + +\begin{equation} +F[1,2:n]=A^{(1)}[1,2:n]=\tilde{\mathbf{d}}_2^T+(\boldsymbol{\alpha}_2^T S^T)[2:n]+ (\boldsymbol{\beta}_2^T \bar{S})[2:n] +\end{equation} + +where + +\begin{equation} + \tilde{\mathbf{d}}_2:= \begin{bmatrix} +\tilde{\mathbf{d}}_s^{(2)} \\ \mathbf{0} \end{bmatrix} \in \mathbb{R}^{n-1} +\end{equation} + +with + +\begin{equation} +\begin{aligned} + \tilde{\mathbf{d}}_s^{(2)}:=(\mathbf{d}_1^T-\tau\mathbf{d}_1^T-\tau\bar{\mathbf{d}}^T+\tau\bar{\mathbf{k}}^T\bar{\mathbf{u}}_1\mathbf{d}_1^T)^T[2:\min(l+m+1,n)]; +\end{aligned} +\end{equation} + +\begin{equation} +\begin{aligned} + \boldsymbol{\alpha}_2:=(\bar{\mathbf{w}}_1^T-\tau\bar{\mathbf{w}}_1^T-\tau\mathbf{f}^T+\tau\bar{\mathbf{k}}^T\bar{\mathbf{u}}_1\bar{\mathbf{w}}_1^T)^T\in \mathbb{R}^p; +\end{aligned} +\end{equation} + +and + +\begin{equation} + \boldsymbol{\beta}_2:=(-\tau \bar{\mathbf{k}}^T)^T\in \mathbb{R}^r. +\end{equation} + + +\textbf{Next is the induction part}: + +Suppose that after the $j$th HT($1\leq j k + A.upperfill[k,j] + A.bands[k,j] + elseif k > j + A.lowerfill[k,j] + A.bands[k,j] + else + A.bands[k,j] + end +end + + +""" +Represents factors matrix for QR for banded+semiseparable. we have + + for j = 0, we have A₀=tril(UV',-1) + B + triu(WS',1); + F = qrfactUnblocked!(A₀).factors # full QR factors; + As j increases, for BandedPlusSemiseparableQRPerturbedFactors A: + A[1:j,:] == F[1:j,:]; + A[:,1:j] == F[:,1:j]; + A[k, k] == F[k, k] for k < j; + A[j+1:end,j+1:end] == A₀[j+1:end,j+1:end] + U[j+1:end,:]*Q*S[j+1:end]' + U[j+1:end,:]*K*U[j+1:end]'*A₀[j+1:end,j+1:end] + + U[j+1:end,:]*[Eₛ 0] + [Xₛ;0]*S[j+1:end]' + [Yₛ;0]*U[j+1:end]'*A₀[j+1:end,j+1:end] + [Zₛ 0;0 0] +""" + +struct BandedPlusSemiseparableQRPerturbedFactors{T} <: LayoutMatrix{T} + n::Int # matrix size + r::Int # lower rank + p::Int # upper rank + l::Int # lower bandwidth + m::Int # upper bandwidth + U::Matrix{T} # n × r + V::Matrix{T} # n × r + W::Matrix{T} # n × (p+r) + S::Matrix{T} # n × (p+r) + B::BandedMatrix{T} # lower bandwidth l and upper bandwidth l + m + + Q::Matrix{T} # r × p + K::Matrix{T} # r × r + Eₛ::Matrix{T} # r × min(l+m,n) + Xₛ::Matrix{T} # min(l,n) × p + Yₛ::Matrix{T} # min(l,n) × r + Zₛ::Matrix{T} # min(l,n) × min(l+m,n) + + j::Base.RefValue{Int} # how many columns have been upper-triangulised +end + +size(A::BandedPlusSemiseparableQRPerturbedFactors) = (A.n,A.n) +axes(A::BandedPlusSemiseparableQRPerturbedFactors) = (1:A.n, 1:A.n) + +function BandedPlusSemiseparableQRPerturbedFactors(U,V,W,S,B) + n = size(U,1) + r = size(U,2) + p = size(W,2) + l, m = bandwidths(B) + A = tril(U*V',-1) + B + triu(W*S',1) + AᵀU = (fast_UᵀA(U, V, W, S, B, 1))' + BandedPlusSemiseparableQRPerturbedFactors(n,r,p,l,m,U,V,[W zeros(n,r)],[S AᵀU],BandedMatrix(B,(l,l+m)), + zeros(r,p),zeros(r,r),zeros(r,min(l+m,n)),zeros(min(l,n),p),zeros(min(l,n),r),zeros(min(l,n),min(l+m,n)),Ref(0)) +end + +function getindex(A::BandedPlusSemiseparableQRPerturbedFactors, k::Integer, i::Integer) + j = A.j[] + if k > j && i > j + AᵀU = (fast_UᵀA(A.U, A.V, A.W, A.S, A.B, j+1))' + UQ = A.U[k,:]' * A.Q + UK = A.U[k,:]' * A.K + + l = A.l[] + m = A.m[] + if k > i + if k <= j + l + A.U[k,:]' * A.V[i,:] + A.B[k,i] + UQ * A.S[i,1:A.p] + UK * AᵀU[i-j,:] + A.U[k,:]' * A.Eₛ[:,i-j] + A.Xₛ[k-j,:]' * A.S[i,1:A.p] + A.Yₛ[k-j,:]' * AᵀU[i-j,:] + A.Zₛ[k-j,i-j] + else + if i <= j + l + m + A.U[k,:]' * A.V[i,:] + A.B[k,i] + UQ * A.S[i,1:A.p] + UK * AᵀU[i-j,:] + A.U[k,:]' * A.Eₛ[:,i-j] + else + A.U[k,:]' * A.V[i,:] + UQ * A.S[i,1:A.p] + UK * AᵀU[i-j,:] + A.B[k,i] + end + end + elseif k < i + if k <= j + l + if i <= j + l + m + A.W[k,:]' * A.S[i,:] + A.B[k,i] + UQ * A.S[i,1:A.p] + UK * AᵀU[i-j,:] + A.U[k,:]' * A.Eₛ[:,i-j] + A.Xₛ[k-j,:]' * A.S[i,1:A.p] + A.Yₛ[k-j,:]' * AᵀU[i-j,:] + A.Zₛ[k-j,i-j] + else + A.W[k,:]' * A.S[i,:] + A.B[k,i] + UQ * A.S[i,1:A.p] + UK * AᵀU[i-j,:] + A.Xₛ[k-j,:]' * A.S[i,1:A.p] + A.Yₛ[k-j,:]' * AᵀU[i-j,:] + end + else + if i <= j + l + m + A.W[k,:]' * A.S[i,:] + A.B[k,i] + UQ * A.S[i,1:A.p] + UK * AᵀU[i-j,:] + A.U[k,:]' * A.Eₛ[:,i-j] + else + A.W[k,:]' * A.S[i,:] + A.B[k,i] + UQ * A.S[i,1:A.p] + UK * AᵀU[i-j,:] + end + end + else + if k <= j + l + A.B[k,i] + UQ * A.S[i,1:A.p] + UK * AᵀU[i-j,:] + A.U[k,:]' * A.Eₛ[:,i-j] + A.Xₛ[k-j,:]' * A.S[i,1:A.p] + A.Yₛ[k-j,:]' * AᵀU[i-j,:] + A.Zₛ[k-j,i-j] + elseif i <= j + l + m + A.B[k,i] + UQ * A.S[i,1:A.p] + UK * AᵀU[i-j,:] + A.U[k,:]' * A.Eₛ[:,i-j] + else + A.B[k,i] + UQ * A.S[i,1:A.p] + UK * AᵀU[i-j,:] + end + end + else + if k > i + A.U[k,:]' * A.V[i,:] + A.B[k,i] + elseif k < i + A.W[k,:]' * A.S[i,:] + A.B[k,i] + else + A.B[k,i] + end + end +end + +function fast_qr!(A) + n = A.n + τ = zeros(n) + if A.j[] != 0 + throw(Exception("Matrix has already been partially upper-triangularized")) + end + + UᵀU = UᵀU_lookup_table(A) + ūw̄_sum = ūw̄_sum_lookup_table(A) + d_extra = d_extra_lookup_table(A) + + for i in 1 : n-1 + onestep_qr!(A, τ, UᵀU, ūw̄_sum, d_extra) + end + + A.B[n,n] = A[n,n] + A.j[] += 1 + + τ +end + +function onestep_qr!(A, τ, UᵀU, ūw̄_sum, d_extra) + k̄, b, p_new, τ_current= compute_Householder_vector(A, UᵀU) # I- τyy' where y = eⱼ₊₁ + U⁽ʲ⁺²⁾k̄ + b and p_new will be the diagonal element on the current column + τ[A.j[]+1] = τ_current + w̄₁, ū₁, d₁, f, d̄ = compute_d₁_and_d̄(A, b) + c₁, c₂, c₃, c₄, c₅, c₆ = compute_variables_c(A, k̄, b, UᵀU) + + Q_prev = A.Q[:,:] + K_prev = A.K[:,:] + Eₛ_prev = A.Eₛ[:,:] + Xₛ_prev = A.Xₛ[:,:] + Yₛ_prev = A.Yₛ[:,:] + Zₛ_prev = A.Zₛ[:,:] + + update_next_submatrix!(A, k̄, b, τ_current, w̄₁, ū₁, d₁, f, d̄, c₁, c₂, c₃, c₄, c₅, c₆) + update_upper_triangular_part!(A, k̄, b, τ_current, w̄₁, ū₁, d₁, f, d̄, c₁, c₂, c₃, c₄, c₅, c₆, Q_prev, K_prev, Eₛ_prev, Xₛ_prev, Yₛ_prev, Zₛ_prev, ūw̄_sum, d_extra) + update_lower_triangular_part!(A, p_new, k̄, b) + + A.j[] += 1 + +end + +# the following are auxiliary functions: + +function compute_Householder_vector(A, UᵀU) + # compute Householder transformation I- τyy' where y = eⱼ₊₁ + U⁽ʲ⁺²⁾k̄ + b + + # first express A[j+1:end,j+1] as p*eⱼ₊₁ + U⁽ʲ⁺²⁾k̄ + b + j = A.j[] + UᵀA_1 = (A.U[j+1:min(A.l+j+1,A.n),:])'*A.B[j+1:min(A.l+j+1,A.n),j+1] + UᵀU[j+2,:,:]*A.V[j+1,:] #the j+1th column of UᵀA + + k̄ = A.V[j+1,:] + A.Q * A.S[j+1,1:A.p] + A.K * UᵀA_1 + if A.l > 0 || A.m > 0 + k̄ += A.Eₛ[:,1] + end + b = A.B[j+2:min(A.l+j+1,A.n), j+1] + if A.l > 0 || A.m > 0 + b[1:min(A.l-1,length(b))] += (A.Xₛ * A.S[j+1,1:A.p] + A.Yₛ * UᵀA_1 + A.Zₛ[:,1])[2:min(A.l,length(b)+1)] + end + p = A.B[j+1,j+1] + A.U[j+1,:]'A.Q * A.S[j+1,1:A.p] + A.U[j+1,:]'A.K*UᵀA_1 + if A.l > 0 + p += A.U[j+1,:]'A.Eₛ[:,1] + A.Xₛ[1,:]'A.S[j+1,1:A.p] + A.Yₛ[1,:]'UᵀA_1 + A.Zₛ[1,1] + elseif A.m > 0 + p += A.U[j+1,:]'A.Eₛ[:,1] + end + + # compute the length square of A[j+1:end,j+1] + len_square = p^2 + k̄' * UᵀU[j+2,:,:] * k̄ + if A.l > 0 + len_square += k̄' * A.U[j+2:j+1+length(b),:]' * b + b' * A.U[j+2:j+1+length(b),:] * k̄ + b'b + end + + p_new = -sign(p) * sqrt(len_square) # the element on the diagonal after HT + k̄ = k̄ / (p - p_new) + b = b / (p - p_new) + τ_current = 2 * (p - p_new)^2 / ((p-p_new)^2 + (len_square - p^2)) # the value τ for the current HT + k̄, b, p_new, τ_current +end + +function compute_d₁_and_d̄(A, b) + j = A.j[] + w̄₁ = A.W[j+1,1:A.p] + ū₁ = A.U[j+1,:] + d₁ = A.B[j+1,j+1:min(j+1+A.m, A.n)] + d₁[1] = d₁[1] - w̄₁'*(A.S[j+1,1:A.p]) + f = (A.W[j+2:j+1+length(b),1:A.p])' * b + index1 = j+2:j+1+length(b) + index2 = j+1:min(j+1+A.m+A.l,A.n) + tril_sub = [ (ii - jj) >= 1 for ii in index1, jj in index2 ] + triu_sub = [ (jj - ii) >= 1 for ii in index1, jj in index2 ] + A₀ = A.U[index1,:]*A.V[index2,:]'.*tril_sub + A.B[index1,index2] + A.W[index1,1:A.p]*A.S[index2,1:A.p]'.*triu_sub + d̄ = (b'A₀ - f'*(A.S[index2,1:A.p])')' + w̄₁, ū₁, d₁, f, d̄ +end + +function compute_variables_c(A, k̄, b, UᵀU) + j = A.j[] + UᵀU⁽²⁾ = UᵀU[j+2,:,:] + + c₁ = (A.Q)' * A.U[j+1,:] + (A.Q)' * UᵀU⁽²⁾ * k̄ + (A.Q)' * (A.U[j+2:j+1+length(b),:])' * b + c₂ = (A.K)' * A.U[j+1,:] + (A.K)' * UᵀU⁽²⁾ * k̄ + (A.K)' * (A.U[j+2:j+1+length(b),:])' * b + c₃ = A.U[j+1,:] + UᵀU⁽²⁾ * k̄ + (A.U[j+2:j+1+length(b),:])' * b + + Xₛ_valid = A.Xₛ[2:min(A.l, A.n-j),:] + Yₛ_valid = A.Yₛ[2:min(A.l, A.n-j),:] + Zₛ_valid = A.Zₛ[2:min(A.l, A.n-j),1:min(A.l+A.m, A.n-j)] + c₄ = (Xₛ_valid)' * A.U[j+2:j+1+size(Xₛ_valid,1),:] * k̄ + (Xₛ_valid)' * b[1:size(Xₛ_valid,1)] + c₅ = (Yₛ_valid)' * A.U[j+2:j+1+size(Yₛ_valid,1),:] * k̄ + (Yₛ_valid)' * b[1:size(Yₛ_valid,1)] + c₆ = (Zₛ_valid)' * A.U[j+2:j+1+size(Zₛ_valid,1),:] * k̄ + (Zₛ_valid)' * b[1:size(Zₛ_valid,1)] + if A.l > 0 + c₄ += A.Xₛ[1,:] + c₅ += A.Yₛ[1,:] + c₆ += A.Zₛ[1,1:min(A.l+A.m, A.n-j)] + end + + c₁, c₂, c₃, c₄, c₅, c₆ +end + +function update_next_submatrix!(A, k̄, b, τ, w̄₁, ū₁, d₁, f, d̄, c₁, c₂, c₃, c₄, c₅, c₆) + Q_prev = A.Q[:,:] + K_prev = A.K[:,:] + Eₛ_prev = A.Eₛ[:,:] + Xₛ_prev = A.Xₛ[:,:] + Yₛ_prev = A.Yₛ[:,:] + Zₛ_prev = A.Zₛ[:,:] + + A.Q[:,:] = -τ*k̄*w̄₁' - τ*k̄*f' + Q_prev - τ*k̄*c₁' + K_prev*ū₁*w̄₁'- + τ*k̄*c₂'*ū₁*w̄₁' - τ*k̄*c₄' - τ*k̄*c₅'*ū₁*w̄₁' + A.K[:,:] = -τ*k̄*k̄' + K_prev - τ*k̄*c₂' - τ*k̄*c₅' + + A.Eₛ[:,1:length(d̄)-1] = -τ*k̄*(d̄[2:end])' + A.Eₛ[:,1:length(d₁)-1] += (-τ*k̄ + K_prev*ū₁ - τ*k̄*c₂'*ū₁ - τ*k̄*c₅'*ū₁)*(d₁[2:end])' + A.Eₛ[:,1:end-1] += Eₛ_prev[:,2:end] - τ*k̄*c₃'*Eₛ_prev[:,2:end] + A.Eₛ[:,1:length(c₆)-1] += -τ*k̄*(c₆[2:end])' + A.Eₛ[:,length(d̄):end] .= 0 + + A.Xₛ[1:length(b),:] = b*(-τ*w̄₁' - τ*f' - τ*c₁' - τ*c₂'*ū₁*w̄₁' - τ*c₄' - τ*c₅'*ū₁*w̄₁') + A.Xₛ[1:end-1,:] += Xₛ_prev[2:end,:] + Yₛ_prev[2:end,:]*ū₁*w̄₁' + A.Xₛ[length(b)+1:end,:] .= 0 + + A.Yₛ[1:length(b),:] = b*(-τ*k̄' - τ*c₂' - τ*c₅') + A.Yₛ[1:end-1,:] += Yₛ_prev[2:end,:] + A.Yₛ[length(b)+1:end,:] .= 0 + + A.Zₛ[1:length(b), 1:length(d̄)-1] = -τ*b*(d̄[2:end])' + A.Zₛ[1:length(b), 1:length(d₁)-1] += b*(-τ - τ*c₂'*ū₁ - τ*c₅'*ū₁)*(d₁[2:end])' + A.Zₛ[1:length(b), 1:end-1] += -τ*b*c₃'*Eₛ_prev[:,2:end] + A.Zₛ[1:end-1, 1:length(d₁)-1] += Yₛ_prev[2:end,:]*ū₁*(d₁[2:end])' + A.Zₛ[1:end-1, 1:end-1] += Zₛ_prev[2:end, 2:end] + A.Zₛ[1:length(b), 1:length(c₆)-1] += -τ*b*(c₆[2:end])' + A.Zₛ[length(b)+1:end,:] .= 0 + A.Zₛ[:,length(d̄):end] .= 0 + +end + +function update_upper_triangular_part!(A, k̄, b, τ, w̄₁, ū₁, d₁, f, d̄, c₁, c₂, c₃, c₄, c₅, c₆, Q, K, Eₛ, Xₛ, Yₛ, Zₛ, ūw̄_sum, d_extra) + j = A.j[] + + β = (-τ*k̄' + ū₁'*K - τ*c₂' - τ*c₅')' + if size(Yₛ, 1) > 0 + β += Yₛ[1,:] + end + + α = (w̄₁' - τ*w̄₁' - τ*f' + ū₁'*Q - τ*c₁' - τ*c₄' + τ*k̄'*ū₁*w̄₁')' + if size(Xₛ, 1) > 0 + α += Xₛ[1,:] + end + α += (-β'*ūw̄_sum[j+1,:,:])' + + d = -τ*d̄[2:end] + d[1:length(d₁)-1] += (1 - τ + τ*k̄'*ū₁)*d₁[2:end] + d[1:min(length(d),size(Eₛ,2)-1)] += ((ū₁' - τ*c₃')*Eₛ[:,2:min(length(d)+1,size(Eₛ,2))])' + d[1:length(c₆)-1] += -τ*c₆[2:end] + if size(Zₛ, 1) > 0 + d[1:min(length(d),size(Zₛ,2)-1)] += Zₛ[1, 2:min(length(d)+1,size(Zₛ,2))] + end + d_extra_current = d_extra[j+1] + d[1:size(d_extra_current,2)] += (-β'*d_extra_current)' + + j = A.j[] + A.W[j+1, 1:A.p] = α + A.W[j+1, A.p+1:end] = β + A.B[j+1, j+2:j+1+length(d)] = d +end + +function update_lower_triangular_part!(A, p, k̄, b) + j = A.j[] + A.B[j+1, j+1] = p + A.B[j+2:j+1+length(b), j+1] = b + A.V[j+1, :] = k̄ +end + +function UᵀU_lookup_table(A) + UᵀU = zeros(A.n, A.r, A.r) + UᵀU_current = zeros(A.r, A.r) + for i in A.n:-1:1 + UᵀU_current += A.U[i,:] * (A.U[i,:])' + UᵀU[i,:,:] = UᵀU_current[:,:] + end + UᵀU +end + +function ūw̄_sum_lookup_table(A) + ūw̄_sum = zeros(A.n, A.r, A.p) + ūw̄_sum_current = zeros(A.r, A.p) + for t in 1:A.n + ūw̄_sum[t,:,:] = ūw̄_sum_current[:,:] + ūw̄_sum_current[:,:] += A.U[t,:] * (A.W[t,1:A.p])' + end + ūw̄_sum +end + +function d_extra_lookup_table(A) + d_extra = Vector{Any}() + for i in 1:A.n + d_extra_current = zeros(A.r, min(A.m, A.n-i)) + for t in max(1,i+1-A.m) : i-1 + d_extra_current += A.U[t,:] * (A.B[t, i+1:i+size(d_extra_current,2)])' + end + push!(d_extra, d_extra_current) + end + + d_extra +end + +function fast_UᵀA(U, V, W, S, B, j) + # compute U[j,end]ᵀ*A[j:end,j:end] where A = tril(UV',-1) + B + triu(WS',1) in O(n) + n = size(U,1) + r = size(U,2) + p = size(W,2) + l, m = bandwidths(B) + UᵀA = zeros(r, n+1-j) + UᵀU = (U[j:end,:])'*U[j:end,:] + UᵀW = zeros(r,p) + for i in j:n + UᵀU -= U[i,:] * (U[i,:])' + UᵀA[:,i+1-j] = UᵀU*V[i,:] + (U[max(j,i-m) : min(i+l,n),:])'*B[max(j,i-m) : min(i+l,n),i] + UᵀW*S[i,:] + UᵀW += U[i,:] * (W[i,:])' + end + UᵀA +end + +end \ No newline at end of file diff --git a/test/test_QR.jl b/test/test_QR.jl new file mode 100644 index 0000000..6360281 --- /dev/null +++ b/test/test_QR.jl @@ -0,0 +1,29 @@ +using BandedMatrices +using Test, LinearAlgebra +using Random +using BandedPlusSemiseparableMatrices +import .BandedPlusSemiseparableMatrices: BandedPlusSemiseparableMatrix, BandedPlusSemiseparableQRPerturbedFactors, fast_qr!, onestep_qr! +#Random.seed!(1234) + +n = 20 +l = 4 +m = 5 +r = 2 +p = 3 +B = brandn(n,n,l,m) +U = randn(n,r) +V = randn(n,r) +W = randn(n,p) +S = randn(n,p) +A = BandedPlusSemiseparableQRPerturbedFactors(U,V,W,S,B) + +F = qr(Matrix(A)).factors +Acopy = copy(Matrix(A)) +mm, nn = size(Acopy) +τ_true = Vector{eltype(Acopy)}(undef, min(mm,nn)) +LAPACK.geqrf!(Acopy, τ_true) + +τ = fast_qr!(A) + +@test Matrix(A) ≈ F +@test τ ≈ τ_true diff --git a/test/test_getindex.jl b/test/test_getindex.jl new file mode 100644 index 0000000..e013b82 --- /dev/null +++ b/test/test_getindex.jl @@ -0,0 +1,64 @@ +using BandedMatrices +using Test, LinearAlgebra +using Random +import .BandedPlusSemiseparableMatrices: BandedPlusSemiseparableMatrix, BandedPlusSemiseparableQRPerturbedFactors + +# test get index for BandedPlusSemiseparableQRPerturbedFactors + +n = 10 +l = 2 +m = 3 +r = 4 +p = 5 +B = brandn(n,n,l,m) +U = randn(n,r) +V = randn(n,r) +W = randn(n,p) +S = randn(n,p) +Q = randn(r,p) +K = randn(r,r) +Eₛ = randn(r,min(l+m,n)) +Xₛ = randn(min(l,n),p) +Yₛ = randn(min(l,n),r) +Zₛ = randn(min(l,n),min(l+m,n)) +A = BandedPlusSemiseparableQRPerturbedFactors(U,V,W,S,B) +A.Q[:,:] = Q +A.K[:,:] = K +A.Eₛ[:,:] = Eₛ +A.Xₛ[:,:] = Xₛ +A.Yₛ[:,:] = Yₛ +A.Zₛ[:,:] = Zₛ + +E = [Eₛ zeros(r,max(n-(l+m),0))] +X = [Xₛ; zeros(max(n - l, 0),p)] +Y = [Yₛ; zeros(max(n - l, 0),r)] +Z = [Zₛ zeros(min(l,n),max(n-(l+m),0)); zeros(max(n-l,0),min(l+m,n)) zeros(max(n-l,0), max(n-(l+m),0))] + +A₀ = tril(U*V',-1) + Matrix(B) + triu(W*S',1) + +AA = A₀ + U*Q*S' + U*K*U'*A₀ + U*E + X*S' + Y*U'*A₀ + Z +jj = 3 +E_j = [Eₛ zeros(r,max(n-jj+1-(l+m),0))] +X_j = [Xₛ; zeros(max(n -jj+1- l, 0),p)] +Y_j = [Yₛ; zeros(max(n -jj+1- l, 0),r)] +Z_j = [Zₛ zeros(min(l,n),max(n-jj+1-(l+m),0)); zeros(max(n-jj+1-l,0),min(l+m,n)) zeros(max(n-jj+1-l,0), max(n-jj+1-(l+m),0))] +AA2 = A₀[jj:end,jj:end] + U[jj:end,:]*Q*(S[jj:end,:])' + U[jj:end,:]*K*(U[jj:end,:])'*A₀[jj:end,jj:end]+ + U[jj:end,:]*E_j + X_j*(S[jj:end,:])' + Y_j*(U[jj:end,:])'*A₀[jj:end,jj:end] + Z_j + +@test Matrix(A) ≈ AA + +j = 2 +UᵀA = zeros(r, n+1-j) +UᵀU = (U[j:end,:])'*U[j:end,:] +UᵀW = zeros(r,p) +for i in j:n + UᵀU[:,:] -= U[i,:] * (U[i,:])' + UᵀA[:,i+1-j] = UᵀU*V[i,:] + (U[max(j,i-m) : min(i+l,n),:])'*B[max(j,i-m) : min(i+l,n),i] + UᵀW*S[i,1:p] + UᵀW[:,:] += U[i,:] * (W[i,1:p])' +end + +A₀ = tril(U*V',-1) + Matrix(B) + triu(W*S',1) +UᵀA_true = (U[j:end,:])'*A₀[j:end,j:end] + +UᵀA - UᵀA_true + From bea0dcf24173e93df442fafa05b7934b8fc1e3f4 Mon Sep 17 00:00:00 2001 From: TaoChenImperial Date: Mon, 10 Nov 2025 16:27:21 +0000 Subject: [PATCH 02/19] update test_getindex.jl --- test/test_getindex.jl | 25 ------------------------- 1 file changed, 25 deletions(-) diff --git a/test/test_getindex.jl b/test/test_getindex.jl index e013b82..e5c97a3 100644 --- a/test/test_getindex.jl +++ b/test/test_getindex.jl @@ -35,30 +35,5 @@ Y = [Yₛ; zeros(max(n - l, 0),r)] Z = [Zₛ zeros(min(l,n),max(n-(l+m),0)); zeros(max(n-l,0),min(l+m,n)) zeros(max(n-l,0), max(n-(l+m),0))] A₀ = tril(U*V',-1) + Matrix(B) + triu(W*S',1) - AA = A₀ + U*Q*S' + U*K*U'*A₀ + U*E + X*S' + Y*U'*A₀ + Z -jj = 3 -E_j = [Eₛ zeros(r,max(n-jj+1-(l+m),0))] -X_j = [Xₛ; zeros(max(n -jj+1- l, 0),p)] -Y_j = [Yₛ; zeros(max(n -jj+1- l, 0),r)] -Z_j = [Zₛ zeros(min(l,n),max(n-jj+1-(l+m),0)); zeros(max(n-jj+1-l,0),min(l+m,n)) zeros(max(n-jj+1-l,0), max(n-jj+1-(l+m),0))] -AA2 = A₀[jj:end,jj:end] + U[jj:end,:]*Q*(S[jj:end,:])' + U[jj:end,:]*K*(U[jj:end,:])'*A₀[jj:end,jj:end]+ - U[jj:end,:]*E_j + X_j*(S[jj:end,:])' + Y_j*(U[jj:end,:])'*A₀[jj:end,jj:end] + Z_j - @test Matrix(A) ≈ AA - -j = 2 -UᵀA = zeros(r, n+1-j) -UᵀU = (U[j:end,:])'*U[j:end,:] -UᵀW = zeros(r,p) -for i in j:n - UᵀU[:,:] -= U[i,:] * (U[i,:])' - UᵀA[:,i+1-j] = UᵀU*V[i,:] + (U[max(j,i-m) : min(i+l,n),:])'*B[max(j,i-m) : min(i+l,n),i] + UᵀW*S[i,1:p] - UᵀW[:,:] += U[i,:] * (W[i,1:p])' -end - -A₀ = tril(U*V',-1) + Matrix(B) + triu(W*S',1) -UᵀA_true = (U[j:end,:])'*A₀[j:end,j:end] - -UᵀA - UᵀA_true - From 4deb7e6d6f6c2cf8293dc504283ada06c9e4ed7b Mon Sep 17 00:00:00 2001 From: Sheehan Olver Date: Tue, 11 Nov 2025 17:20:34 +0000 Subject: [PATCH 03/19] Delete main.tex --- paper/main.tex | 665 ------------------------------------------------- 1 file changed, 665 deletions(-) delete mode 100644 paper/main.tex diff --git a/paper/main.tex b/paper/main.tex deleted file mode 100644 index 851b6d8..0000000 --- a/paper/main.tex +++ /dev/null @@ -1,665 +0,0 @@ -\documentclass{article} -\usepackage{graphicx} % Required for inserting images -\usepackage{amsmath} -\usepackage{amssymb} -\usepackage{amsthm} -\usepackage{algorithm} -\usepackage{algpseudocode} -\newtheorem{theorem}{Theorem} -\newtheorem{lemma}{Lemma} -\newtheorem{definition}{Definition} -\title{A fast QR decomposition for banded plus semiseparable matrices} -\author{Tao Chen} - - -\begin{document} - -\maketitle - -Consider an $n$ by $n$ banded plus semiseparable matrix $A$ with rank $r$ in its lower triangular part and rank $p$ in its upper triangular part, meanwhile, the banded part has lower bandwidth $l$ and upper bandwidth $m$. - -$A$ can be formulated as - -\begin{equation} -A:=tril(UV^T, -1) + B + triu(WS^T, 1)\label{express_A} -\end{equation} - -where $U \in \mathbb{R}^{n\times r}$, $V\in \mathbb{R}^{n\times r}$, $W\in \mathbb{R}^{n\times p}$, and $S\in \mathbb{R}^{n \times p}$. Also, $B$ is a banded matrix with lower bandwidth $l$ and upper bandwidth $m$, which can be represented as $B:=(b_{ij})_{i,j=1}^n\in \mathbb{R}^{n,n}$ with $b_{ij}=0$ if $i-j>l$ or $j-i>m$. - -If we define $\bar{\mathbf{u}}_i:=U[i,:]^T\in \mathbb{R}^r$, $\bar{\mathbf{v}}_i:=V[i,:]^T\in \mathbb{R}^r$, $\bar{\mathbf{w}}_i:=W[i,:]^T\in \mathbb{R}^p$, and $\bar{\mathbf{s}}_i:=S[i,:]^T\in \mathbb{R}^p$ for $i=1,...,n$, we have: -\begin{equation} -A = -\begin{bmatrix} -b_{11} & \bar{\mathbf{w}}_1^T \bar{\mathbf{s}}_2 + b_{12} & \bar{\mathbf{w}}_1^T \bar{\mathbf{s}}_3 + b_{13} & \cdots & \bar{\mathbf{w}}_1^T \bar{\mathbf{s}}_n + b_{1n} \\ -\bar{\mathbf{u}}_2^T \bar{\mathbf{v}}_1 + b_{21} & b_{22} & \bar{\mathbf{w}}_2^T \bar{\mathbf{s}}_3 + b_{23} & \cdots & \bar{\mathbf{w}}_2^T \bar{\mathbf{s}}_n + b_{2n} \\ -\bar{\mathbf{u}}_3^T \bar{\mathbf{v}}_1 + b_{31} & \bar{\mathbf{u}}_3^T \bar{\mathbf{v}}_2 + b_{32} & b_{33} & \cdots & \bar{\mathbf{w}}_3^T \bar{\mathbf{s}}_n + b_{3n} \\ -\vdots & \vdots & \vdots & \ddots & \vdots \\ -\bar{\mathbf{u}}_n^T \bar{\mathbf{v}}_1 + b_{n1} & \bar{\mathbf{u}}_n^T \bar{\mathbf{v}}_2 + b_{n2} & \bar{\mathbf{u}}_n^T \bar{\mathbf{v}}_3 + b_{n3} & \cdots & b_{nn} -\end{bmatrix} -\end{equation} - -After applying QR decomposition to $A$, the resulting factor matrix $F$, in which the upper triangular portion stores the matrix $R$, and the lower triangular portion contains the Householder reflection vectors $\mathbf{y}$ generated during the decomposition processretain, is again a banded plus semiseparable structure. Specifically, the lower semiseparable part has rank $r$, the upper semiseparable part has rank $r+p$, the lower bandwidth is $l$, and the upper bandwidth is $l+m$. - -Next, we demonstrate by induction why the factor matrix $F$ mantains such a banded plus semiseparable structure. - -Before we start, an important notation will be: for a matrix $S$, let $S[i:j,m:n]$ represent the submatrix of $S$ from row $i$ to row $j$ and from column $m$ to column $n$. When $i=j$ or $m=n$, the notation will be simplified as $S[i,m:n]$ or $S[i:j,m]$. - -Let's first introduce a definition and two very helpful lemmas: - -\begin{definition}[Householder-modified banded-plus-semiseparable matrix] - Given an $n\times n$ banded-plus-semiseparable matrix(bpsm) $A=tril(UV^T, -1) + B + triu(WS^T, 1)$ where $U\in \mathbb{R}^{n\times r}$, $V\in \mathbb{R}^{n\times r}$, $W \in \mathbb{R}^{n\times p}$, $S \in \mathbb{R}^{n\times r}$, and $B$ a banded matrix with lower bandwidth $l$ and upper bandwidth $m$, a matrix $C$ is called a Householder-modified banded-plus-semiseparable matrix(hmbpsm) to $A$ if - - \begin{equation} - C=A+UQS^T+UKU^TA+UE+XS^T+YU^TA+Z - \end{equation} - for some $Q\in \mathbb{R}^{r\times p}$, $K\in \mathbb{R}^{r\times r}$, $E=[E_s,\mathbf{0}]\in \mathbb{R}^{r \times n}$ with $E_s\in \mathbb{R}^{r\times \min(l+m,n)}$; $X=\begin{bmatrix} -X_s \\ \mathbf{0} \end{bmatrix}\in \mathbb{R}^{n\times p}$ with $X_s\in \mathbb{R}^{\min(l,n)\times p}$; $Y=\begin{bmatrix} -Y_s \\ \mathbf{0} \end{bmatrix}\in \mathbb{R}^{n \times r}$ with $Y_s\in \mathbb{R}^{\min(l,n)\times r}$; and $Z=\begin{bmatrix} -Z_s & \mathbf{0} \\ \mathbf{0} & \mathbf{0} \end{bmatrix}\in \mathbb{R}^{n \times n}$ with $Z_s \in \mathbb{R}^{\min(l,n)\times \min(l+m,n)}$. -\end{definition} - -\begin{definition}[Householder-modified banded-plus-semiseparable vector] - -Let $A \in \mathbb{R}^{n \times n}$ be a banded-plus-semiseparable matrix (bpsm) as defined previously. A vector $\mathbf{c}$ of length $n-1$ is called a Householder-modified banded-plus-semiseparable vector(hmbpsv) to $A$ if - \begin{equation} - \mathbf{c}^T=\mathbf{d}^T+\boldsymbol{\alpha}^T (S^T[:,2:n])+ \boldsymbol{\beta}^T ((U^TA)[:,2:n]) - \end{equation} -for some $\mathbf{d}=\begin{bmatrix} -\mathbf{d}_s \\ \mathbf{0} \end{bmatrix} \in \mathbb{R}^{n-1}$ with $\mathbf{d}_s\in \mathbb{R}^{\min(l+m,n)}$, $\boldsymbol{\alpha}\in \mathbb{R}^p$, and $\boldsymbol{\beta}\in \mathbb{R}^r$. -\end{definition} - -\begin{lemma} \label{helpful_lemma} - -Given a bpsm $A$ and its hmbpsm $C$, Suppose a Householder transformation is applied to $C$ to eliminate -the subdiagonal entries of its first column, and denote the resulting matrix by -$\tilde{C}$. Then the following hold: -\begin{enumerate} - \item The submatrix $\tilde{C}[2:n,\,2:n]$ - is an hmbpsm to $A[2:n,\,2:n]$. - \item - $\tilde{C}[1,\,2:n]$ is an hmbpsv to $A$. -\end{enumerate} -\end{lemma} - - -\begin{proof} - -Let's first introduce some notations: - -$A:=tril(UV^T, -1) + B + triu(WS^T, 1)$ where $U=(\mathbf{u}_1,...,\mathbf{u}_r) \in \mathbb{R}^{n\times r}$, $V=(\mathbf{v}_1,...,\mathbf{v}_r)\in \mathbb{R}^{n\times r}$, $W=(\mathbf{w}_1,...,\mathbf{w}_p)\in \mathbb{R}^{n\times p}$, and $S=(\mathbf{s}_1,...,\mathbf{s}_p)\in \mathbb{R}^{n \times p}$. Here $\mathbf{u}_i=(u_1^{(i)},...,u_n^{(i)})^T\in \mathbb{R}^n$ and $\mathbf{v}_i=(v_1^{(i)},...,v_n^{(i)})^T\in \mathbb{R}^n$ for $i=1,...,r$; $\mathbf{w}_i=(w_1^{(i)},...,w_n^{(i)})^T\in \mathbb{R}^n$ and $\mathbf{s}_i=(s_1^{(i)},...,s_n^{(i)})^T\in \mathbb{R}^n$ for $i=1,...,p$. Also, $B=(b_{ij})_{i,j=1}^n\in \mathbb{R}^{n,n}$ with $b_{ij}=0$ if $i-j>l$ or $j-i>m$. - -$C:=A+UQS^T+UKU^TA+UE+XS^T+YU^TA+Z$ where $Q\in \mathbb{R}^{r\times p}$ and $K\in \mathbb{R}^{r\times r}$; $E=[E_s,\mathbf{0}]\in \mathbb{R}^{r \times n}$ with $E_s\in \mathbb{R}^{r\times\min(l+m,n)}$; $X=\begin{bmatrix} -X_s \\ \mathbf{0} \end{bmatrix}\in \mathbb{R}^{n\times p}$ with $X_s\in \mathbb{R}^{\min(l,n)\times p}$; $Y=\begin{bmatrix} -Y_s \\ \mathbf{0} \end{bmatrix}\in \mathbb{R}^{n \times r}$ with $Y_s\in \mathbb{R}^{\min(l,n)\times r}$; $Z=\begin{bmatrix} -Z_s & \mathbf{0} \\ \mathbf{0} & \mathbf{0} \end{bmatrix}\in \mathbb{R}^{n \times n}$ with $Z_s \in \mathbb{R}^{\min(l,n)\times\min(l+m,n)}$. - -$\tilde{C}:=(I-\tau\mathbf{y}\mathbf{y}^T)C$ where $I-\tau\mathbf{y}\mathbf{y}^T$ is a Householder transformation for eliminating the first column of $C$(below the diagonal). Here $\tau$ is a coefficient. From the expression of $C$, it is easy to see that $\mathbf{y}$ can be expressed as $\mathbf{y}=\mathbf{e}_1+U^{(2)}\bar{\mathbf{k}}+\mathbf{b}$ with $\mathbf{e}_1=(1,0,...,0)^T\in \mathbb{R}^n$, $U^{(2)}\in \mathbb{R}^{n\times r}$ s.t. $U^{(2)}[1,:]=0$ and $U^{(2)}[2:n,:]=U[2:n,:]$, $\bar{\mathbf{k}}=(\bar{k}_1,...,\bar{k}_r)^T\in \mathbb{R}^r$, and $\mathbf{b}=(0,b_2,...,b_{min(l+1,n)},0,...,0)^T\in \mathbb{R}^n$. - -Let $\bar{\mathbf{u}}_1:=(u_1^{(1)},...,u_1^{(r)})^T\in \mathbb{R}^r$; write $$\mathbf{e}_1^TA=\mathbf{d}_1^T+\bar{\mathbf{w}}_1^TS^T$$ where $\mathbf{d}_1=B[1,:]^T\in \mathbb{R}^n$, $\bar{\mathbf{w}}_1=(w_1^{(1)},...,w_1^{(p)})^T\in \mathbb{R}^p$, - -and $$\mathbf{b}^TA=\bar{\mathbf{d}}^T+\mathbf{f}^TS^T$$ where $\bar{\mathbf{d}}=(\bar{d}_1,...,\bar{d}_{\min(l+m+1,n)},0,...,0)^T\in \mathbb{R}^n$, $\mathbf{f}=W^T\mathbf{b}\in \mathbb{R}^{p}$. - -Next, define the following $6$ variables: $\mathbf{c}_1:=Q^TU^T\mathbf{y} \in \mathbb{R}^p$, $\mathbf{c}_2:=K^TU^T\mathbf{y} \in \mathbb{R}^r$, $\mathbf{c}_3:=U^T\mathbf{y} \in \mathbb{R}^r$, $\mathbf{c}_4:=X^T\mathbf{y} \in \mathbb{R}^p$, $\mathbf{c}_5:=Y^T\mathbf{y} \in \mathbb{R}^r$ and -$\mathbf{c}_6:=Z^T\mathbf{y}\in \mathbb{R}^{n}$. - -It is easy to see that $\mathbf{x}$ can be written as $\mathbf{c}_6=\begin{bmatrix} -\mathbf{c}_{6s} \\ \mathbf{0} \end{bmatrix}$ with $\mathbf{c}_{6s}\in \mathbb{R}^{\min(l+m,n)}$. - -At last, let $\mathbf{x}^{(1)}:=X[1,:]^T\in \mathbb{R}^p$, $\mathbf{y}^{(1)}:=Y[1,:]^T\in \mathbb{R}^r$, and $\mathbf{z}^{(1)}:=Z[1,:]^T\in \mathbb{R}^n$. - -To compute $(I-\tau\mathbf{y}\mathbf{y}^T)C$ where $C:=A+UQS^T+UKU^TA+UE+XS^T+YU^TA+Z$, we compute $(I-\tau\mathbf{y}\mathbf{y}^T)A$, $(I-\tau\mathbf{y}\mathbf{y}^T)UQS^T$, $(I-\tau\mathbf{y}\mathbf{y}^T)UKU^TA$, $(I-\tau\mathbf{y}\mathbf{y}^T)UE$, $(I-\tau\mathbf{y}\mathbf{y}^T)XS^T$, $(I-\tau\mathbf{y}\mathbf{y}^T)YU^TA$, and $(I-\tau\mathbf{y}\mathbf{y}^T)Z$ separately. - -(i) By writing $\mathbf{y}=\mathbf{e}_1+U^{(2)}\bar{\mathbf{k}}+\mathbf{b}$, $\mathbf{e}_1^TA=\mathbf{d}_1^T+\bar{\mathbf{w}}_1^TS^T$, and $\mathbf{b}^TA=\bar{\mathbf{d}}^T+\mathbf{f}^TS^T$, we get - -\begin{equation}\label{proof_first} - \begin{aligned} - (I-\tau\mathbf{y}\mathbf{y}^T)A=&A+\mathbf{e_1}(-\tau\mathbf{d}_1^T-\tau\bar{\mathbf{d}}^T)+\mathbf{e_1}(-\tau\bar{\mathbf{w}}_1^T-\tau\mathbf{f}^T)S^T\\&+\mathbf{e}_1(-\tau\bar{\mathbf{k}}^T)U^{(2)T}A+U^{(2)}(-\tau\bar{\mathbf{k}}\bar{\mathbf{w}}_1^T-\tau\bar{\mathbf{k}}\mathbf{f}^T)S^T\\& - +U^{(2)}(-\tau\bar{\mathbf{k}}\bar{\mathbf{k}}^T)U^{(2)T}A+U^{(2)}(-\tau\bar{\mathbf{k}}\mathbf{d}_1^T-\tau\bar{\mathbf{k}}\bar{\mathbf{d}}^T)\\& - +(-\tau \mathbf{b}\bar{\mathbf{w}}_1^T-\tau\mathbf{b}\mathbf{f}^T)S^T+(-\tau\mathbf{b}\bar{\mathbf{k}}^T)U^{(2)T}A\\&+(-\tau\mathbf{b}\mathbf{d}_1^T-\tau\mathbf{b}\bar{\mathbf{d}}^T) - \end{aligned} -\end{equation} - -(ii) By writing $\mathbf{y}^TUQ=\mathbf{c}_1^T$, $\mathbf{y}=\mathbf{e}_1+U^{(2)}\bar{\mathbf{k}}+\mathbf{b}$, and $U=\mathbf{e}_1\bar{\mathbf{u}}_1^T+U^{(2)}$ where $\bar{\mathbf{u}}_1=(u_1^{(1)},...,u_1^{(r)})\in \mathbb{R}^r$, we get - -\begin{equation} - \begin{aligned} - (I-\tau\mathbf{y}\mathbf{y}^T)UQS^T=\mathbf{e}_1(\bar{\mathbf{u}}_1^TQ-\tau\mathbf{c}_1^T)S^T+U^{(2)}(Q-\tau\bar{\mathbf{k}}\mathbf{c}_1^T)S^T+(-\tau\mathbf{b}\mathbf{c}_1^T)S^T - \end{aligned} -\end{equation} - -(iii) By writing $\mathbf{y}^TUK=\mathbf{c}_2^T$, $\mathbf{y}=\mathbf{e}_1+U^{(2)}\bar{\mathbf{k}}+\mathbf{b}$, $U=\mathbf{e}_1\bar{\mathbf{u}}_1^T+U^{(2)}$, and $\mathbf{e}_1^TA=\mathbf{d}_1^T+\bar{\mathbf{w}}_1^TS^T$, we get - -\begin{equation} - \begin{aligned} - &(I-\tau\mathbf{y}\mathbf{y}^T)UKU^TA\\=&\mathbf{e}_1(\bar{\mathbf{u}}_1^TK\bar{\mathbf{u}}_1\mathbf{d}_1^T-\tau\mathbf{c}_2^T\bar{\mathbf{u}}_1\mathbf{d}_1^T)+\mathbf{e}_1(\bar{\mathbf{u}}_1^TK\bar{\mathbf{u}}_1\bar{\mathbf{w}}_1^T-\tau\mathbf{c}_2^T\bar{\mathbf{u}}_1\bar{\mathbf{w}}_1^T)S^T\\&+\mathbf{e}_1(\bar{\mathbf{u}}_1^TK-\tau\mathbf{c}_2^T)U^{(2)T}A - +U^{(2)}(K\bar{\mathbf{u}}_1\bar{\mathbf{w}}_1^T-\tau\bar{\mathbf{k}}\mathbf{c}_2^T\bar{\mathbf{u}}_1\bar{\mathbf{w}}_1^T)S^T\\&+U^{(2)}(K-\tau\bar{\mathbf{k}}\mathbf{c}_2^T)U^{(2)T}A+U^{(2)}(K\bar{\mathbf{u}}_1\mathbf{d}_1^T-\tau\bar{\mathbf{k}}\mathbf{c}_2^T\bar{\mathbf{u}}_1\mathbf{d}_1^T)\\&+(-\tau\mathbf{b}\mathbf{c}_2^T\bar{\mathbf{u}}_1\bar{\mathbf{w}}_1^T)S^T+(-\tau\mathbf{b}\mathbf{c}_2^T)U^{(2)T}A+(-\tau\mathbf{b}\mathbf{c}_2^T\bar{\mathbf{u}}_1\mathbf{d}_1^T) - \end{aligned} -\end{equation} - -(iv) By writing $\mathbf{y}^TU=\mathbf{c}_3^T$, $\mathbf{y}=\mathbf{e}_1+U^{(2)}\bar{\mathbf{k}}+\mathbf{b}$, and $U=\mathbf{e}_1\bar{\mathbf{u}}_1^T+U^{(2)}$, we get - -\begin{equation} - \begin{aligned} - (I-\tau\mathbf{y}\mathbf{y}^T)UE=\mathbf{e}_1(\bar{\mathbf{u}}_1^TE-\tau\mathbf{c}_3^TE)+U^{(2)}(E-\tau\bar{\mathbf{k}}\mathbf{c}_3^TE)+(-\tau\mathbf{b}\mathbf{c}_3^TE) - \end{aligned} -\end{equation} - -(v) By writing $\mathbf{y}^TX=\mathbf{c}_4^T$ and $\mathbf{y}=\mathbf{e}_1+U^{(2)}\bar{\mathbf{k}}+\mathbf{b}$, we get - -\begin{equation} - \begin{aligned} - (I-\tau\mathbf{y}\mathbf{y}^T)XS^T=\mathbf{e}_1(-\tau\mathbf{c}_4^T)S^T+U^{(2)}(-\tau\bar{\mathbf{k}}\mathbf{c}_4^T)S^T+(X-\tau\mathbf{b}\mathbf{c}_4^T)S^T - \end{aligned} -\end{equation} - -(vi) By writing $\mathbf{y}^TY=\mathbf{c}_5^T$, $\mathbf{y}=\mathbf{e}_1+U^{(2)}\bar{\mathbf{k}}+\mathbf{b}$, $U=\mathbf{e}_1\bar{\mathbf{u}}_1^T+U^{(2)}$, and $\mathbf{e}_1^TA=\mathbf{d}_1^T+\bar{\mathbf{w}}_1^TS^T$, we get - -\begin{equation} - \begin{aligned} - (I-\tau\mathbf{y}\mathbf{y}^T)YU^TA=&\mathbf{e}_1(-\tau\mathbf{c}_5^T\bar{\mathbf{u}}_1\mathbf{d}_1^T)+\mathbf{e}_1(-\tau\mathbf{c}_5^T\bar{\mathbf{u}}_1\bar{\mathbf{w}}_1^T)S^T+\mathbf{e}_1(-\tau\mathbf{c}_5^T)U^{(2)T}A\\& - +U^{(2)}(-\tau\bar{\mathbf{k}}\mathbf{c}_5^T\bar{\mathbf{u}}_1\bar{\mathbf{w}}_1^T)S^T+U^{(2)}(-\tau\bar{\mathbf{k}}\mathbf{c}_5^T)U^{(2)T}A\\&+U^{(2)}(-\tau\bar{\mathbf{k}}\mathbf{c}_5^T\bar{\mathbf{u}}_1\mathbf{d}_1^T)+(Y\bar{\mathbf{u}}_1\bar{\mathbf{w}}_1^T-\tau\mathbf{b}\mathbf{c}_5^T\bar{\mathbf{u}}_1\bar{\mathbf{w}}_1^T)S^T\\&+(Y-\tau\mathbf{b}\mathbf{c}_5^T)U^{(2)T}A+(Y\bar{\mathbf{u}}_1\mathbf{d}_1^T-\tau\mathbf{b}\mathbf{c}_5^T\bar{\mathbf{u}}_1\mathbf{d}_1^T) - \end{aligned} -\end{equation} - -(vii) By writing $\mathbf{y}^TZ=\mathbf{c}_6^T$ and $\mathbf{y}=\mathbf{e}_1+U^{(2)}\bar{\mathbf{k}}+\mathbf{b}$, we get - -\begin{equation}\label{proof_last} -\begin{aligned} - (I-\tau\mathbf{y}\mathbf{y}^T)Z=\mathbf{e}_1(-\tau\mathbf{c}_6^T)+U^{(2)}(-\tau\bar{\mathbf{k}}\mathbf{c}_6^T)+(Z-\tau\mathbf{b}\mathbf{c}_6^T) -\end{aligned} -\end{equation} - -Combining Eq.(\ref{proof_first}) to (\ref{proof_last}), we obtain - -\textbf{firstly}, -\begin{equation}\label{submatrix_structure} -\tilde{C}[2:n,2:n]=\tilde{A}+\tilde{U}\tilde{Q}\tilde{S}^T+\tilde{U}\tilde{K}\tilde{U}^T\tilde{A}+\tilde{U}\tilde{E}+\tilde{X}\tilde{S}^T+\tilde{Y}\tilde{U}^T\tilde{A}+\tilde{Z} -\end{equation} - -where - -\begin{equation} - \tilde{A}:=A[2:n,2:n]; -\end{equation} - -\begin{equation} - \tilde{U}:=U[2:n,:]; -\end{equation} - -\begin{equation} - \tilde{S}:=S[2:n,:]; -\end{equation} - -\begin{equation} -\begin{aligned} - \tilde{Q}:=&-\tau\bar{\mathbf{k}}\bar{\mathbf{w}}_1^T-\tau \bar{\mathbf{k}}\mathbf{f}^T+Q-\tau\bar{\mathbf{k}}\mathbf{c}_1^T+K\bar{\mathbf{u}}_1\bar{\mathbf{w}}_1^T\\& - -\tau\bar{\mathbf{k}}\mathbf{c}_2^T\bar{\mathbf{u}}_1\bar{\mathbf{w}}_1^T-\tau\bar{\mathbf{k}}\mathbf{c}_4^T-\tau\bar{\mathbf{k}}\mathbf{c}_5^T\bar{\mathbf{u}}_1\bar{\mathbf{w}}_1^T; - \end{aligned} -\end{equation} - -\begin{equation} - \tilde{K}:=-\tau\bar{\mathbf{k}}\bar{\mathbf{k}}^T+K-\tau\bar{\mathbf{k}}\mathbf{c}_2^T-\tau\bar{\mathbf{k}}\mathbf{c}_5^T; -\end{equation} - -\begin{equation} - \tilde{E}:=[\tilde{E}_s,\mathbf{0}] \in \mathbb{R}^{r\times (n-1)} -\end{equation} - -with - -\begin{equation} -\begin{aligned} - \tilde{E}_s:=&(-\tau\bar{\mathbf{k}}\mathbf{d}_1^T-\tau\bar{\mathbf{k}}\bar{\mathbf{d}}^T+K\bar{\mathbf{u}}_1\mathbf{d}_1^T-\tau\bar{\mathbf{k}}\mathbf{c}_2^T\bar{\mathbf{u}}_1\mathbf{d}_1^T+E\\&-\tau\bar{\mathbf{k}}\mathbf{c}_3^TE-\tau\bar{\mathbf{k}}\mathbf{c}_5^T\bar{\mathbf{u}}_1\mathbf{d}_1^T-\tau\bar{\mathbf{k}}\mathbf{c}_6^T)[:,2:\min(l+m+1,n)]; -\end{aligned} -\end{equation} - -\begin{equation} - \tilde{X}:=\begin{bmatrix} -\tilde{X}_s \\ \mathbf{0} \end{bmatrix} \in \mathbb{R}^{(n-1)\times p} -\end{equation} - -with - -\begin{equation} -\begin{aligned} - \tilde{X}_s:=&(-\tau \mathbf{b}\bar{\mathbf{w}}_1^T-\tau \mathbf{b}\mathbf{f}^T-\tau \mathbf{b}\mathbf{c}_1^T-\tau \mathbf{b}\mathbf{c}_2^T\bar{\mathbf{u}}_1\bar{\mathbf{w}}_1^T+X\\&-\tau\mathbf{b}\mathbf{c}_4^T+Y\bar{\mathbf{u}}_1\bar{\mathbf{w}}_1^T-\tau\mathbf{b}\mathbf{c}_5^T\bar{\mathbf{u}}_1\bar{\mathbf{w}}_1^T)[2:\min(l+1,n),:]; -\end{aligned} -\end{equation} - -\begin{equation} - \tilde{Y}:= \begin{bmatrix} -\tilde{Y}_s \\ \mathbf{0} \end{bmatrix} \in \mathbb{R}^{(n-1)\times r} -\end{equation} - -with - -\begin{equation} - \tilde{Y}_s:=(-\tau \mathbf{b}\bar{\mathbf{k}}^T-\tau\mathbf{b}\mathbf{c}_2^T+Y-\tau\mathbf{b}\mathbf{c}_5^T)[2:\min(l+1,n),:]; -\end{equation} - -\begin{equation} - \tilde{Z}:= \begin{bmatrix} -\tilde{Z}_s & \mathbf{0} \\ \mathbf{0} & \mathbf{0} \end{bmatrix}\in \mathbb{R}^{(n-1) \times (n-1)} -\end{equation} - -with - -\begin{equation} -\begin{aligned} - \tilde{Z}_s:=&(-\tau\mathbf{b}\mathbf{d}_1^T-\tau\mathbf{b}\bar{\mathbf{d}}^T-\tau\mathbf{b}\mathbf{c}_2^T\bar{\mathbf{u}}_1\mathbf{d}_1^T-\tau\mathbf{b}\mathbf{c}_3^TE+Y\bar{\mathbf{u}}_1\mathbf{d}_1^T-\tau\mathbf{b}\mathbf{c}_5^T\bar{\mathbf{u}}_1\mathbf{d}_1^T\\&+Z-\tau\mathbf{b}\mathbf{c}_6^T)[2:\min(l+1,n),2:\min(l+m+1,n)]. -\end{aligned} -\end{equation} - -\textbf{Secondly}, - -\begin{equation} -\tilde{C}[1,2:n]=\hat{\mathbf{d}}^T+\hat{\boldsymbol{\alpha}}^T (S^T[:,2:n])+ \hat{\boldsymbol{\beta}}^T ((U^{(2)T}A)[:,2:n]) -\end{equation} - -where - -\begin{equation} - \hat{\mathbf{d}}:= \begin{bmatrix} -\hat{\mathbf{d}}_s \\ \mathbf{0} \end{bmatrix} \in \mathbb{R}^{n-1} -\end{equation} - -with - -\begin{equation} -\begin{aligned} - \hat{\mathbf{d}}_s:=&(\mathbf{d}_1^T-\tau\mathbf{d}_1^T-\tau\bar{\mathbf{d}}^T+\bar{\mathbf{u}}_1^TK\bar{\mathbf{u}}_1\mathbf{d}_1^T-\tau\mathbf{c}_2^T\bar{\mathbf{u}}_1\mathbf{d}_1^T+\bar{\mathbf{u}}_1^TE-\tau\mathbf{c}_3^TE\\&+\mathbf{y}^{(1)T}\bar{\mathbf{u}}_1\mathbf{d}_1^T-\tau\mathbf{c}_5^T\bar{\mathbf{u}}_1\mathbf{d}_1^T+\mathbf{z}^{(1)T}-\tau\mathbf{c}_6^T)^T[2:\min(l+m+1,n)]; -\end{aligned} -\end{equation} - -\begin{equation} -\begin{aligned} - \hat{\boldsymbol{\alpha}}:=&(\bar{\mathbf{w}}_1^T-\tau\bar{\mathbf{w}}_1^T-\tau\mathbf{f}^T+\bar{\mathbf{u}}_1^TQ-\tau\mathbf{c}_1^T+\bar{\mathbf{u}}_1^TK\bar{\mathbf{u}}_1\bar{\mathbf{w}}_1^T\\&-\tau\mathbf{c}_2^T\bar{\mathbf{u}}_1\bar{\mathbf{w}}_1^T+\mathbf{x}^{(1)T}-\tau\mathbf{c}_4^T+\mathbf{y}^{(1)T}\bar{\mathbf{u}}_1\bar{\mathbf{w}}_1^T-\tau\mathbf{c}_5^T\bar{\mathbf{u}}_1\bar{\mathbf{w}}_1^T)^T\in \mathbb{R}^p; -\end{aligned} -\end{equation} - -and - -\begin{equation} - \hat{\boldsymbol{\beta}}:=(-\tau \bar{\mathbf{k}}^T+\bar{\mathbf{u}}_1^TK-\tau\mathbf{c}_2^T+\mathbf{y}^{(1)T}-\tau\mathbf{c}_5^T)^T\in \mathbb{R}^r. -\end{equation} - -From $U^{(2)}=U-\mathbf{e}_1\bar{\mathbf{u}}_1^T$ and $\mathbf{e}_1^TA=\mathbf{d}_1^T+\bar{\mathbf{w}}_1^TS^T$, we have $U^{(2)T}A=U^TA-\bar{\mathbf{u}}_1\bar{\mathbf{w}}_1^TS^T-\bar{\mathbf{u}}_1\mathbf{d}_1^T$, so alternatively, we can write $\tilde{C}[1,2:n]$ as: - -\begin{equation}\label{row_structure} -\tilde{C}[1,2:n]=\tilde{\mathbf{d}}^T+\tilde{\boldsymbol{\alpha}}^T (S^T[:,2:n])+ \tilde{\boldsymbol{\beta}}^T ((U^TA)[:,2:n]) -\end{equation} - -where - -\begin{equation} - \tilde{\mathbf{d}}:= \begin{bmatrix} -\tilde{\mathbf{d}}_s \\ \mathbf{0} \end{bmatrix} \in \mathbb{R}^{n-1} -\end{equation} - -with - -\begin{equation} -\begin{aligned} - \tilde{\mathbf{d}}_s:=&(\mathbf{d}_1^T-\tau\mathbf{d}_1^T-\tau\bar{\mathbf{d}}^T+\bar{\mathbf{u}}_1^TE-\tau\mathbf{c}_3^TE\\&+\mathbf{z}^{(1)T}-\tau\mathbf{c}_6^T+\tau\bar{\mathbf{k}}^T\bar{\mathbf{u}}_1\mathbf{d}_1^T)^T[2:\min(l+m+1,n)]; -\end{aligned} -\end{equation} - -\begin{equation} -\begin{aligned} - \tilde{\boldsymbol{\alpha}}:=&(\bar{\mathbf{w}}_1^T-\tau\bar{\mathbf{w}}_1^T-\tau\mathbf{f}^T+\bar{\mathbf{u}}_1^TQ-\tau\mathbf{c}_1^T+\mathbf{x}^{(1)T}-\tau\mathbf{c}_4^T+\tau\bar{\mathbf{k}}^T\bar{\mathbf{u}}_1\bar{\mathbf{w}}_1^T)^T\in \mathbb{R}^p; -\end{aligned} -\end{equation} - -and - -\begin{equation} - \tilde{\boldsymbol{\beta}}:=(-\tau \bar{\mathbf{k}}^T+\bar{\mathbf{u}}_1^TK-\tau\mathbf{c}_2^T+\mathbf{y}^{(1)T}-\tau\mathbf{c}_5^T)^T\in \mathbb{R}^r. -\end{equation} - -We have proven lemma \ref{helpful_lemma} from Eq (\ref{submatrix_structure}) and (\ref{row_structure}). - -\end{proof} - - -With this lemma, we can now prove the following theorem. - - -\begin{theorem} -After applying QR decomposition to a banded plus semiseparable matrix $A$(which is expressed in Eq.(\ref{express_A})) with lower rank $r$, upper rank $p$, lower bandwidth $l$, and upper bandwidth $m$, the factor matrix $F$ obtained is also a banded plus semiseparable matrix. Its lower semiseparable part has rank $r$ and upper semiseparable part has rank $r+p$; its lower bandwidth is $l$ and upper bandwidth is $l+m$. -\end{theorem} - -\begin{proof} - -The QR decomposition can be done by performing a series of Householder Transformations(HT) to eliminate elements below the diagonals of $A$ in the first column, the second column, ..., up to the $n-1$th column in order. - -First, we perform an HT to eliminate the first column, that is, computing $(I-\tau_1 \mathbf{y}_1 \mathbf{y}^T_1)A$, where $\mathbf{y}_1$ is a regularized reflection vector s.t. the first nonzero element in $\mathbf{y}_1$ is $1$, and $\tau_1$ is a coefficient found during the process of the first HT. It is easy to see that $\mathbf{y}_1$ can be represented as $\mathbf{y}_1=\mathbf{e}_1+U^{(2)}\bar{\mathbf{k}}_2+\mathbf{b}_2$ where $\bar{\mathbf{k}}_2\in \mathbb{R}^r$, $\mathbf{b}_2=(0,b^{(2)}_2,b^{(2)}_3,...,b^{(2)}_{\min(l+1,n)},0,...,0)^T \in R^n$. Therefore, the first column of $F$ below the diagonal can be determined as -\begin{equation} -F[2:n, 1]=(U\bar{\mathbf{k}}_2+\mathbf{b}_2)[2:n]. -\end{equation} - -Define $A_i=A[i:n,i:n]\in \mathbb{R}^{n+1-i,n+1-i}$, $U_i=U[i:n,:]\in \mathbb{R}^{(n+1-i)\times r}$, $V_i=V[i:n,:]\in \mathbb{R}^{(n+1-i)\times r}$, $S_i=S[i:n,:]\in \mathbb{R}^{(n+1-i)\times p}$, and $W_i=W[i:n,:]\in \mathbb{R}^{(n+1-i)\times p}$; $\bar{\mathbf{u}}_i:=(u_i^{(1)},...,u_i^{(r)})^T\in \mathbb{R}^r$ and $\bar{\mathbf{w}}_i=(w_i^{(1)},...,w_i^{(p)})^T\in \mathbb{R}^p$ for $i=1,...,n$. Let $A^{(i)}$ be matrix $A$ after the $i$th HT, we have $A^{(1)}:=(I-\tau_1 \mathbf{y}_1 \mathbf{y}^T_1)A$ now. Also let $\bar{S}:=U^TA\in \mathbb{R}^{r\times n}$, apply Eq.(\ref{submatrix_structure}) and (\ref{row_structure}) in lemma (\ref{helpful_lemma}) by setting $A=A_1$, $U=U_1$, $S=S_1$, $Q=\mathbf{0}$, $K=\mathbf{0}$, $E=\mathbf{0}$, $X=\mathbf{0}$, $Y=\mathbf{0}$, $Z=\mathbf{0}$, and compute $\mathbf{c}_1^{(1)}:=Q^TU_1^T\mathbf{y}_1=\mathbf{0}$, $\mathbf{c}_2^{(1)}:=K^TU_1^T\mathbf{y}_1=\mathbf{0}$, $\mathbf{c}_3^{(1)}:=U_1^T\mathbf{y}_1$, $\mathbf{c}_4^{(1)}:=X^T\mathbf{y}_1 =\mathbf{0}$, $\mathbf{c}_5^{(1)}:=Y^T\mathbf{y}_1=\mathbf{0}$ and -$\mathbf{c}_6^{(1)}:=Z^T\mathbf{y}_1=\mathbf{0}$, also write $\mathbf{e}_1^TA_1=\mathbf{d}_1^T+\bar{\mathbf{w}}_1^TS_1^T$ where $\mathbf{d}_1=B[1,:]^T\in \mathbb{R}^n$, and $\mathbf{b}_2^TA_1=\bar{\mathbf{d}}_1^T+\mathbf{f}_1^TS_1^T$ where $\bar{\mathbf{d}}_1=(\bar{d}_1,...,\bar{d}_{\min(l+m+1,n)},0,...,0)^T\in \mathbb{R}^n$, $\mathbf{f}_1=W_1^T\mathbf{b}_2\in \mathbb{R}^{p}$, we have: - -i. - -\begin{equation} -A ^{(1)}[2:n,2:n]=A_2+U_2Q_2S_2^T+U_2K_2U_2^TA_2+U_2E_2+X_2S_2^T+Y_2U_2^TA_2+Z_2 -\end{equation} -with -\begin{equation} -\begin{aligned} - Q_2:=-\tau_1\bar{\mathbf{k}}_2\bar{\mathbf{w}}_1^T-\tau_1 \bar{\mathbf{k}}_2\mathbf{f}_1^T - \end{aligned} -\end{equation} - -\begin{equation} - K_2:=-\tau_1\bar{\mathbf{k}}_2\bar{\mathbf{k}}_2^T; -\end{equation} - -\begin{equation} - E_2:=[E_s^{(2)},\mathbf{0}] \in \mathbb{R}^{r\times (n-1)} -\end{equation} - -with - -\begin{equation} -\begin{aligned} - E_s^{(2)}:=&(-\tau_1\bar{\mathbf{k}}_2\mathbf{d}_1^T-\tau_1\bar{\mathbf{k}}_2\bar{\mathbf{d}}_1^T)[:,2:\min(l+m+1,n)]; -\end{aligned} -\end{equation} - -\begin{equation} - X_2:=\begin{bmatrix} -X_s^{(2)} \\ \mathbf{0} \end{bmatrix} \in \mathbb{R}^{(n-1)\times p} -\end{equation} - -with - -\begin{equation} -\begin{aligned} - X_s^{(2)}:=&(-\tau_1 \mathbf{b}_2\bar{\mathbf{w}}_1^T-\tau_1 \mathbf{b}_2\mathbf{f}_1^T)[2:\min(l+1,n),:]; -\end{aligned} -\end{equation} - -\begin{equation} - Y_2:= \begin{bmatrix} -Y_s^{(2)} \\ \mathbf{0} \end{bmatrix} \in \mathbb{R}^{(n-1)\times r} -\end{equation} - -with - -\begin{equation} - Y_s^{(2)}:=(-\tau_1 \mathbf{b}_2\bar{\mathbf{k}}_2^T)[2:\min(l+1,n),:]; -\end{equation} - -\begin{equation} - Z_2:= \begin{bmatrix} -Z_s^{(2)} & \mathbf{0} \\ \mathbf{0} & \mathbf{0} \end{bmatrix}\in \mathbb{R}^{(n-1) \times (n-1)} -\end{equation} - -with - -\begin{equation} -\begin{aligned} - Z_s^{(2)}:=&(-\tau_1\mathbf{b}_2\mathbf{d}_1^T-\tau_1\mathbf{b}_2\bar{\mathbf{d}}_1^T)[2:\min(l+1,n),2:\min(l+m+1,n)]. -\end{aligned} -\end{equation} - -ii. - -\begin{equation} -F[1,2:n]=A^{(1)}[1,2:n]=\tilde{\mathbf{d}}_2^T+(\boldsymbol{\alpha}_2^T S^T)[2:n]+ (\boldsymbol{\beta}_2^T \bar{S})[2:n] -\end{equation} - -where - -\begin{equation} - \tilde{\mathbf{d}}_2:= \begin{bmatrix} -\tilde{\mathbf{d}}_s^{(2)} \\ \mathbf{0} \end{bmatrix} \in \mathbb{R}^{n-1} -\end{equation} - -with - -\begin{equation} -\begin{aligned} - \tilde{\mathbf{d}}_s^{(2)}:=(\mathbf{d}_1^T-\tau\mathbf{d}_1^T-\tau\bar{\mathbf{d}}^T+\tau\bar{\mathbf{k}}^T\bar{\mathbf{u}}_1\mathbf{d}_1^T)^T[2:\min(l+m+1,n)]; -\end{aligned} -\end{equation} - -\begin{equation} -\begin{aligned} - \boldsymbol{\alpha}_2:=(\bar{\mathbf{w}}_1^T-\tau\bar{\mathbf{w}}_1^T-\tau\mathbf{f}^T+\tau\bar{\mathbf{k}}^T\bar{\mathbf{u}}_1\bar{\mathbf{w}}_1^T)^T\in \mathbb{R}^p; -\end{aligned} -\end{equation} - -and - -\begin{equation} - \boldsymbol{\beta}_2:=(-\tau \bar{\mathbf{k}}^T)^T\in \mathbb{R}^r. -\end{equation} - - -\textbf{Next is the induction part}: - -Suppose that after the $j$th HT($1\leq j Date: Tue, 11 Nov 2025 17:21:44 +0000 Subject: [PATCH 04/19] Update Project.toml --- Project.toml | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/Project.toml b/Project.toml index 14ff321..ba0d5eb 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ -name = "BandedPlusSemiseparableMatrices" -uuid = "baa84449-aec6-4dc3-ad95-190cb16cdd31" +name = "SemiseparableMatrices" +uuid = "f8ebbe35-cbfb-4060-bf7f-b10e4670cf57" authors = ["Sheehan Olver "] -version = "0.4.0" +version = "0.5.0" [deps] ArrayLayouts = "4c555306-a7a7-4459-81d9-ec55ddd5c99a" @@ -10,8 +10,6 @@ BlockBandedMatrices = "ffab5731-97b5-5995-9138-79e8c1846df0" LazyArrays = "5078a376-72f3-5289-bfd5-ec5146d43c02" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" MatrixFactorizations = "a3b82374-2e81-5b9e-98ce-41277c0e4c87" -Revise = "295af30f-e4ad-537b-8983-00126c2a3abe" -SemiseparableMatrices = "f8ebbe35-cbfb-4060-bf7f-b10e4670cf57" [compat] ArrayLayouts = "1.0" From 39d87ab512999d90a25b61e28c390463cf57b87f Mon Sep 17 00:00:00 2001 From: Sheehan Olver Date: Tue, 11 Nov 2025 18:01:06 +0000 Subject: [PATCH 05/19] fix code --- src/BandedPlusSemiseparableMatrices.jl | 28 +++++++------- src/SemiseparableMatrices.jl | 5 ++- test/runtests.jl | 4 +- test/test_QR.jl | 52 +++++++++++++------------- test/test_bandedplussemiseparable.jl | 8 ++++ 5 files changed, 54 insertions(+), 43 deletions(-) create mode 100644 test/test_bandedplussemiseparable.jl diff --git a/src/BandedPlusSemiseparableMatrices.jl b/src/BandedPlusSemiseparableMatrices.jl index 3545cd6..147feb6 100644 --- a/src/BandedPlusSemiseparableMatrices.jl +++ b/src/BandedPlusSemiseparableMatrices.jl @@ -1,12 +1,3 @@ -module BandedPlusSemiseparableMatrices - -using LinearAlgebra, BandedMatrices -import BandedMatrices: isbanded, bandwidths -import Base: size, axes, getindex -using SemiseparableMatrices: LowRankMatrix, LayoutMatrix - -export BandedPlusSemiseparableMatrix, BandedPlusSemiseparableQRPerturbedFactors, fast_qr! - struct BandedPlusSemiseparableMatrix{T} <: LayoutMatrix{T} bands::BandedMatrix{T} lowerfill::LowRankMatrix{T} @@ -64,6 +55,9 @@ struct BandedPlusSemiseparableQRPerturbedFactors{T} <: LayoutMatrix{T} j::Base.RefValue{Int} # how many columns have been upper-triangulised end +BandedPlusSemiseparableMatrix(A::BandedPlusSemiseparableQRPerturbedFactors) = BandedPlusSemiseparableMatrix(A.B, (A.U,A.V), (A.W,A.S)) +BandedPlusSemiseparableQRPerturbedFactors(A::BandedPlusSemiseparableMatrix) = BandedPlusSemiseparableQRPerturbedFactors(copy(A.lowerfill.args[1]), copy(A.lowerfill.args[2]'), copy(A.upperfill.args[1]), copy(A.upperfill.args[2]'), copy(A.bands)) + size(A::BandedPlusSemiseparableQRPerturbedFactors) = (A.n,A.n) axes(A::BandedPlusSemiseparableQRPerturbedFactors) = (1:A.n, 1:A.n) @@ -131,11 +125,12 @@ function getindex(A::BandedPlusSemiseparableQRPerturbedFactors, k::Integer, i::I end end -function fast_qr!(A) + +function qr!(A::BandedPlusSemiseparableQRPerturbedFactors) n = A.n τ = zeros(n) if A.j[] != 0 - throw(Exception("Matrix has already been partially upper-triangularized")) + throw(ErrorException("Matrix has already been partially upper-triangularized")) end UᵀU = UᵀU_lookup_table(A) @@ -149,7 +144,12 @@ function fast_qr!(A) A.B[n,n] = A[n,n] A.j[] += 1 - τ + QR(A, τ) +end + +function qr(A::BandedPlusSemiseparableMatrix) + F = qr!(BandedPlusSemiseparableQRPerturbedFactors(A)) + QR(BandedPlusSemiseparableMatrix(F.factors), F.τ) end function onestep_qr!(A, τ, UᵀU, ūw̄_sum, d_extra) @@ -344,7 +344,7 @@ function ūw̄_sum_lookup_table(A) end function d_extra_lookup_table(A) - d_extra = Vector{Any}() + d_extra = Vector{Matrix{eltype(A)}}() for i in 1:A.n d_extra_current = zeros(A.r, min(A.m, A.n-i)) for t in max(1,i+1-A.m) : i-1 @@ -371,6 +371,4 @@ function fast_UᵀA(U, V, W, S, B, j) UᵀW += U[i,:] * (W[i,:])' end UᵀA -end - end \ No newline at end of file diff --git a/src/SemiseparableMatrices.jl b/src/SemiseparableMatrices.jl index 4109546..3414567 100644 --- a/src/SemiseparableMatrices.jl +++ b/src/SemiseparableMatrices.jl @@ -2,7 +2,7 @@ module SemiseparableMatrices using LinearAlgebra: BlasFloat using ArrayLayouts, BandedMatrices, LazyArrays, LinearAlgebra, MatrixFactorizations, Base -import Base: size, getindex, setindex!, convert, copyto! +import Base: size, getindex, setindex!, convert, copyto!, copy import MatrixFactorizations: QR, QRPackedQ, getQ, getR, QRPackedQLayout, AdjQRPackedQLayout import LinearAlgebra: qr, qr!, lmul!, ldiv!, rmul!, triu!, factorize, rank import BandedMatrices: _banded_qr!, bandeddata, resize @@ -11,7 +11,7 @@ import ArrayLayouts: MemoryLayout, sublayout, sub_materialize, MatLdivVec, mater triangulardata, zero!, _copyto!, colsupport, rowsupport, _qr, _qr!, _factorize -export SemiseparableMatrix, AlmostBandedMatrix, LowRankMatrix, ApplyMatrix, ApplyArray, almostbandwidths, almostbandedrank +export SemiseparableMatrix, AlmostBandedMatrix, LowRankMatrix, ApplyMatrix, ApplyArray, almostbandwidths, almostbandedrank, BandedPlusSemiseparableMatrix LazyArraysBandedMatricesExt = Base.get_extension(LazyArrays, :LazyArraysBandedMatricesExt) BandedLayouts = LazyArraysBandedMatricesExt.BandedLayouts @@ -30,5 +30,6 @@ separablerank(A) = size(arguments(ApplyLayout{typeof(*)}(),A)[1],2) include("SemiseparableMatrix.jl") include("AlmostBandedMatrix.jl") include("invbanded.jl") +include("BandedPlusSemiseparableMatrices.jl") end # module diff --git a/test/runtests.jl b/test/runtests.jl index fc20df3..11278af 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -2,4 +2,6 @@ using SemiseparableMatrices, BandedMatrices, LinearAlgebra, Test include("test_semiseparable.jl") include("test_almostbanded.jl") -include("test_invbanded.jl") \ No newline at end of file +include("test_invbanded.jl") +include("test_bandedplussemiseparable.jl") +include("test_qr.jl") \ No newline at end of file diff --git a/test/test_QR.jl b/test/test_QR.jl index 6360281..b499b0c 100644 --- a/test/test_QR.jl +++ b/test/test_QR.jl @@ -1,29 +1,31 @@ -using BandedMatrices -using Test, LinearAlgebra -using Random -using BandedPlusSemiseparableMatrices -import .BandedPlusSemiseparableMatrices: BandedPlusSemiseparableMatrix, BandedPlusSemiseparableQRPerturbedFactors, fast_qr!, onestep_qr! +using BandedMatrices, Test, LinearAlgebra, Random, SemiseparableMatrices +using SemiseparableMatrices: BandedPlusSemiseparableMatrix, BandedPlusSemiseparableQRPerturbedFactors, onestep_qr! #Random.seed!(1234) -n = 20 -l = 4 -m = 5 -r = 2 -p = 3 -B = brandn(n,n,l,m) -U = randn(n,r) -V = randn(n,r) -W = randn(n,p) -S = randn(n,p) -A = BandedPlusSemiseparableQRPerturbedFactors(U,V,W,S,B) +@testset "QR" begin + n = 20 + l, m, r, p = 4, 5, 2, 3 + B = brandn(n,n,l,m) -F = qr(Matrix(A)).factors -Acopy = copy(Matrix(A)) -mm, nn = size(Acopy) -τ_true = Vector{eltype(Acopy)}(undef, min(mm,nn)) -LAPACK.geqrf!(Acopy, τ_true) + @testset "BandedPlusSemiseparableQRPerturbedFactors" begin + U,V = randn(n,r), randn(n,r) + W,S = randn(n,p), randn(n,p) + A = BandedPlusSemiseparableQRPerturbedFactors(U,V,W,S,B) + fact_true = LinearAlgebra.qrfactUnblocked!(Matrix(A)) + fact = qr!(A) + @test A ≈ fact_true.factors ≈ fact.factors + @test fact_true.τ ≈ fact.τ + end -τ = fast_qr!(A) - -@test Matrix(A) ≈ F -@test τ ≈ τ_true + @test "BandedPlusSemiseparableMatrix" begin + U,V = randn(n,r), randn(n,r) + W,S = randn(n,p), randn(n,p) + A = BandedPlusSemiseparableMatrix(B,(U,V),(W,S)) + A_true = Matrix(A) + fact_true = LinearAlgebra.qrfactUnblocked!(Matrix(A)) + fact = qr(A) + @test A ≈ A_true + @test fact_true.factors ≈ fact.factors + @test fact_true.τ ≈ fact.τ + end +end \ No newline at end of file diff --git a/test/test_bandedplussemiseparable.jl b/test/test_bandedplussemiseparable.jl new file mode 100644 index 0000000..9fab664 --- /dev/null +++ b/test/test_bandedplussemiseparable.jl @@ -0,0 +1,8 @@ +using SemiseparableMatrices, Test + +@testset "BandedPlusSemiseparable" begin + B = brandn(n,n,l,m) + U,V = randn(n,r), randn(n,r) + W,S = randn(n,p), randn(n,p) + @test BandedPlusSemiseparableMatrix(B, (U,V), (W,S)) ≈ B + tril(U*V',-1) + triu(W*S',1) +end \ No newline at end of file From c181f27cac82f9e2a46d7bd5db42e75ebdb99bff Mon Sep 17 00:00:00 2001 From: Sheehan Olver Date: Wed, 12 Nov 2025 10:37:50 +0000 Subject: [PATCH 06/19] tests pass --- src/BandedPlusSemiseparableMatrices.jl | 1 - src/SemiseparableMatrices.jl | 2 +- test/test_QR.jl | 6 +++--- test/test_bandedplussemiseparable.jl | 2 ++ 4 files changed, 6 insertions(+), 5 deletions(-) diff --git a/src/BandedPlusSemiseparableMatrices.jl b/src/BandedPlusSemiseparableMatrices.jl index 147feb6..0ecf6c6 100644 --- a/src/BandedPlusSemiseparableMatrices.jl +++ b/src/BandedPlusSemiseparableMatrices.jl @@ -59,7 +59,6 @@ BandedPlusSemiseparableMatrix(A::BandedPlusSemiseparableQRPerturbedFactors) = Ba BandedPlusSemiseparableQRPerturbedFactors(A::BandedPlusSemiseparableMatrix) = BandedPlusSemiseparableQRPerturbedFactors(copy(A.lowerfill.args[1]), copy(A.lowerfill.args[2]'), copy(A.upperfill.args[1]), copy(A.upperfill.args[2]'), copy(A.bands)) size(A::BandedPlusSemiseparableQRPerturbedFactors) = (A.n,A.n) -axes(A::BandedPlusSemiseparableQRPerturbedFactors) = (1:A.n, 1:A.n) function BandedPlusSemiseparableQRPerturbedFactors(U,V,W,S,B) n = size(U,1) diff --git a/src/SemiseparableMatrices.jl b/src/SemiseparableMatrices.jl index 3414567..7d43af1 100644 --- a/src/SemiseparableMatrices.jl +++ b/src/SemiseparableMatrices.jl @@ -2,7 +2,7 @@ module SemiseparableMatrices using LinearAlgebra: BlasFloat using ArrayLayouts, BandedMatrices, LazyArrays, LinearAlgebra, MatrixFactorizations, Base -import Base: size, getindex, setindex!, convert, copyto!, copy +import Base: size, getindex, setindex!, convert, copyto!, copy, axes import MatrixFactorizations: QR, QRPackedQ, getQ, getR, QRPackedQLayout, AdjQRPackedQLayout import LinearAlgebra: qr, qr!, lmul!, ldiv!, rmul!, triu!, factorize, rank import BandedMatrices: _banded_qr!, bandeddata, resize diff --git a/test/test_QR.jl b/test/test_QR.jl index b499b0c..fbbb1e8 100644 --- a/test/test_QR.jl +++ b/test/test_QR.jl @@ -1,5 +1,5 @@ -using BandedMatrices, Test, LinearAlgebra, Random, SemiseparableMatrices -using SemiseparableMatrices: BandedPlusSemiseparableMatrix, BandedPlusSemiseparableQRPerturbedFactors, onestep_qr! +using BandedMatrices, LinearAlgebra, Random, SemiseparableMatrices, Test +using SemiseparableMatrices: BandedPlusSemiseparableQRPerturbedFactors #Random.seed!(1234) @testset "QR" begin @@ -17,7 +17,7 @@ using SemiseparableMatrices: BandedPlusSemiseparableMatrix, BandedPlusSemisepara @test fact_true.τ ≈ fact.τ end - @test "BandedPlusSemiseparableMatrix" begin + @testset "BandedPlusSemiseparableMatrix" begin U,V = randn(n,r), randn(n,r) W,S = randn(n,p), randn(n,p) A = BandedPlusSemiseparableMatrix(B,(U,V),(W,S)) diff --git a/test/test_bandedplussemiseparable.jl b/test/test_bandedplussemiseparable.jl index 9fab664..20d2d2d 100644 --- a/test/test_bandedplussemiseparable.jl +++ b/test/test_bandedplussemiseparable.jl @@ -1,6 +1,8 @@ using SemiseparableMatrices, Test @testset "BandedPlusSemiseparable" begin + n = 20 + l, m, r, p = 4, 5, 2, 3 B = brandn(n,n,l,m) U,V = randn(n,r), randn(n,r) W,S = randn(n,p), randn(n,p) From 7733e8e86e1eea5beb08285aecb5cf46d5f60a89 Mon Sep 17 00:00:00 2001 From: Sheehan Olver Date: Wed, 12 Nov 2025 10:43:41 +0000 Subject: [PATCH 07/19] enable precomputation --- src/BandedPlusSemiseparableMatrices.jl | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/src/BandedPlusSemiseparableMatrices.jl b/src/BandedPlusSemiseparableMatrices.jl index 0ecf6c6..e6aed0e 100644 --- a/src/BandedPlusSemiseparableMatrices.jl +++ b/src/BandedPlusSemiseparableMatrices.jl @@ -125,19 +125,18 @@ function getindex(A::BandedPlusSemiseparableQRPerturbedFactors, k::Integer, i::I end -function qr!(A::BandedPlusSemiseparableQRPerturbedFactors) - n = A.n - τ = zeros(n) +function qr!(A::BandedPlusSemiseparableQRPerturbedFactors{T}) where T if A.j[] != 0 throw(ErrorException("Matrix has already been partially upper-triangularized")) end - UᵀU = UᵀU_lookup_table(A) - ūw̄_sum = ūw̄_sum_lookup_table(A) - d_extra = d_extra_lookup_table(A) + bandedplussemi_qr!(A, zeros(T,A.n), UᵀU_lookup_table(A), ūw̄_sum_lookup_table(A), d_extra_lookup_table(A)) +end +function bandedplussemi_qr!(A, τ, tables...) + n = A.n for i in 1 : n-1 - onestep_qr!(A, τ, UᵀU, ūw̄_sum, d_extra) + onestep_qr!(A, τ, tables...) end A.B[n,n] = A[n,n] From cd70feb48ed4a1c9cb6ae75fc68f14e8f771de67 Mon Sep 17 00:00:00 2001 From: Sheehan Olver Date: Wed, 12 Nov 2025 11:11:15 +0000 Subject: [PATCH 08/19] Update ci.yml --- .github/workflows/ci.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index cdedb11..2c60ba0 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -10,7 +10,8 @@ jobs: fail-fast: false matrix: version: - - '1.10' + - 'lts' + - '1' os: - ubuntu-latest - macOS-latest From e6ee517b6962f91a823c30371704ea808ce40432 Mon Sep 17 00:00:00 2001 From: TaoChenImperial Date: Mon, 17 Nov 2025 15:20:43 +0000 Subject: [PATCH 09/19] decrease memory allocations and modify data structure --- src/BandedPlusSemiseparableMatrices.jl | 344 +++++++++++++------------ test/test_QR.jl | 2 +- 2 files changed, 180 insertions(+), 166 deletions(-) diff --git a/src/BandedPlusSemiseparableMatrices.jl b/src/BandedPlusSemiseparableMatrices.jl index e6aed0e..6fcdc20 100644 --- a/src/BandedPlusSemiseparableMatrices.jl +++ b/src/BandedPlusSemiseparableMatrices.jl @@ -1,25 +1,33 @@ struct BandedPlusSemiseparableMatrix{T} <: LayoutMatrix{T} - bands::BandedMatrix{T} - lowerfill::LowRankMatrix{T} - upperfill::LowRankMatrix{T} + # representing B + tril(UV', -1) + triu(WS', 1) + B::BandedMatrix{T} + U::Matrix{T} + V::Matrix{T} + W::Matrix{T} + S::Matrix{T} end -BandedPlusSemiseparableMatrix(B, (U,V), (W,S)) = BandedPlusSemiseparableMatrix(B, LowRankMatrix(U, V'), LowRankMatrix(W, S')) +function BandedPlusSemiseparableMatrix(B, (U,V), (W,S)) + if size(U,1) == size(V,1) == size(W,1) == size(S,1) == size(B,1) == size(B,2) && size(U,2) == size(V,2) && size(W,2) == size(S,2) + BandedPlusSemiseparableMatrix(B, U, V, W, S) + else + throw(ErrorException("Dimensions not match!")) + end +end -size(A::BandedPlusSemiseparableMatrix) = size(A.bands) +size(A::BandedPlusSemiseparableMatrix) = size(A.B) copy(A::BandedPlusSemiseparableMatrix) = A # not mutable function getindex(A::BandedPlusSemiseparableMatrix, k::Integer, j::Integer) if j > k - A.upperfill[k,j] + A.bands[k,j] + view(A.W, k, :)' * view(A.S, j, :) + A.B[k,j] elseif k > j - A.lowerfill[k,j] + A.bands[k,j] + view(A.U, k, :)' * view(A.V, j, :) + A.B[k,j] else - A.bands[k,j] + A.B[k,j] end end - """ Represents factors matrix for QR for banded+semiseparable. we have @@ -29,21 +37,16 @@ Represents factors matrix for QR for banded+semiseparable. we have A[1:j,:] == F[1:j,:]; A[:,1:j] == F[:,1:j]; A[k, k] == F[k, k] for k < j; - A[j+1:end,j+1:end] == A₀[j+1:end,j+1:end] + U[j+1:end,:]*Q*S[j+1:end]' + U[j+1:end,:]*K*U[j+1:end]'*A₀[j+1:end,j+1:end] - + U[j+1:end,:]*[Eₛ 0] + [Xₛ;0]*S[j+1:end]' + [Yₛ;0]*U[j+1:end]'*A₀[j+1:end,j+1:end] + [Zₛ 0;0 0] + A[j+1:end,j+1:end] == A₀[j+1:end,j+1:end] + U[j+1:end,:]*Q*S[j+1:end,1:p]' + U[j+1:end,:]*K*U[j+1:end]'*A₀[j+1:end,j+1:end] + + U[j+1:end,:]*[Eₛ 0] + [Xₛ;0]*S[j+1:end,1:p]' + [Yₛ;0]*U[j+1:end]'*A₀[j+1:end,j+1:end] + [Zₛ 0;0 0] """ struct BandedPlusSemiseparableQRPerturbedFactors{T} <: LayoutMatrix{T} - n::Int # matrix size - r::Int # lower rank - p::Int # upper rank - l::Int # lower bandwidth - m::Int # upper bandwidth + B::BandedMatrix{T} # lower bandwidth l and upper bandwidth l + m U::Matrix{T} # n × r V::Matrix{T} # n × r W::Matrix{T} # n × (p+r) S::Matrix{T} # n × (p+r) - B::BandedMatrix{T} # lower bandwidth l and upper bandwidth l + m Q::Matrix{T} # r × p K::Matrix{T} # r × r @@ -55,93 +58,95 @@ struct BandedPlusSemiseparableQRPerturbedFactors{T} <: LayoutMatrix{T} j::Base.RefValue{Int} # how many columns have been upper-triangulised end -BandedPlusSemiseparableMatrix(A::BandedPlusSemiseparableQRPerturbedFactors) = BandedPlusSemiseparableMatrix(A.B, (A.U,A.V), (A.W,A.S)) -BandedPlusSemiseparableQRPerturbedFactors(A::BandedPlusSemiseparableMatrix) = BandedPlusSemiseparableQRPerturbedFactors(copy(A.lowerfill.args[1]), copy(A.lowerfill.args[2]'), copy(A.upperfill.args[1]), copy(A.upperfill.args[2]'), copy(A.bands)) +BandedPlusSemiseparableMatrix(A::BandedPlusSemiseparableQRPerturbedFactors) = BandedPlusSemiseparableMatrix(A.B, (A.U, A.V), (A.W, A.S)) +BandedPlusSemiseparableQRPerturbedFactors(A::BandedPlusSemiseparableMatrix) = BandedPlusSemiseparableQRPerturbedFactors(copy(A.B), (copy(A.U), copy(A.V)), (copy(A.W), copy(A.S))) -size(A::BandedPlusSemiseparableQRPerturbedFactors) = (A.n,A.n) +size(A::BandedPlusSemiseparableQRPerturbedFactors) = size(A.B) -function BandedPlusSemiseparableQRPerturbedFactors(U,V,W,S,B) - n = size(U,1) - r = size(U,2) - p = size(W,2) - l, m = bandwidths(B) - A = tril(U*V',-1) + B + triu(W*S',1) - AᵀU = (fast_UᵀA(U, V, W, S, B, 1))' - BandedPlusSemiseparableQRPerturbedFactors(n,r,p,l,m,U,V,[W zeros(n,r)],[S AᵀU],BandedMatrix(B,(l,l+m)), - zeros(r,p),zeros(r,r),zeros(r,min(l+m,n)),zeros(min(l,n),p),zeros(min(l,n),r),zeros(min(l,n),min(l+m,n)),Ref(0)) +function BandedPlusSemiseparableQRPerturbedFactors(B, (U,V), (W,S)) + if size(U,1) == size(V,1) == size(W,1) == size(S,1) == size(B,1) == size(B,2) && size(U,2) == size(V,2) && size(W,2) == size(S,2) + n, r = size(U) + p = size(W,2) + l, m = bandwidths(B) + A = tril(U*V',-1) + B + triu(W*S',1) + AᵀU = (fast_UᵀA(U, V, W, S, B, 1))' + BandedPlusSemiseparableQRPerturbedFactors(BandedMatrix(B,(l,l+m)),U,V,[W zeros(n,r)],[S AᵀU], + zeros(r,p),zeros(r,r),zeros(r,min(l+m,n)),zeros(min(l,n),p),zeros(min(l,n),r),zeros(min(l,n),min(l+m,n)),Ref(0)) + else + throw(ErrorException("Dimensions not match!")) + end end function getindex(A::BandedPlusSemiseparableQRPerturbedFactors, k::Integer, i::Integer) j = A.j[] + p = size(A.W, 2) - size(A.U, 2) # W already been padded to have size n × (p+r) if k > j && i > j AᵀU = (fast_UᵀA(A.U, A.V, A.W, A.S, A.B, j+1))' - UQ = A.U[k,:]' * A.Q - UK = A.U[k,:]' * A.K + UQ = view(A.U, k, :)' * A.Q + UK = view(A.U, k, :)' * A.K - l = A.l[] - m = A.m[] + l, m = bandwidths(A.B) + m = m - l # B already been padded to have upper bandwidth l+m if k > i if k <= j + l - A.U[k,:]' * A.V[i,:] + A.B[k,i] + UQ * A.S[i,1:A.p] + UK * AᵀU[i-j,:] + A.U[k,:]' * A.Eₛ[:,i-j] + A.Xₛ[k-j,:]' * A.S[i,1:A.p] + A.Yₛ[k-j,:]' * AᵀU[i-j,:] + A.Zₛ[k-j,i-j] + view(A.U, k, :)' * view(A.V, i, :) + A.B[k,i] + UQ * view(A.S, i, 1:p) + UK * view(AᵀU, i-j, :) + view(A.U, k, :)' * view(A.Eₛ, :, i-j) + view(A.Xₛ, k-j, :)' * view(A.S, i, 1:p) + view(A.Yₛ, k-j, :)' * view(AᵀU, i-j, :) + A.Zₛ[k-j,i-j] else if i <= j + l + m - A.U[k,:]' * A.V[i,:] + A.B[k,i] + UQ * A.S[i,1:A.p] + UK * AᵀU[i-j,:] + A.U[k,:]' * A.Eₛ[:,i-j] + view(A.U, k, :)' * view(A.V, i, :) + A.B[k,i] + UQ * view(A.S, i, 1:p) + UK * view(AᵀU, i-j, :) + view(A.U, k, :)' * view(A.Eₛ, :, i-j) else - A.U[k,:]' * A.V[i,:] + UQ * A.S[i,1:A.p] + UK * AᵀU[i-j,:] + A.B[k,i] + view(A.U, k, :)' * view(A.V, i, :) + UQ * view(A.S, i, 1:p) + UK * view(AᵀU, i-j, :) + A.B[k,i] end end elseif k < i if k <= j + l if i <= j + l + m - A.W[k,:]' * A.S[i,:] + A.B[k,i] + UQ * A.S[i,1:A.p] + UK * AᵀU[i-j,:] + A.U[k,:]' * A.Eₛ[:,i-j] + A.Xₛ[k-j,:]' * A.S[i,1:A.p] + A.Yₛ[k-j,:]' * AᵀU[i-j,:] + A.Zₛ[k-j,i-j] + view(A.W, k, :)' * view(A.S, i, :) + A.B[k,i] + UQ * view(A.S, i, 1:p) + UK * view(AᵀU, i-j, :) + view(A.U, k, :)' * view(A.Eₛ, :, i-j) + view(A.Xₛ, k-j, :)' * view(A.S, i, 1:p) + view(A.Yₛ, k-j, :)' * view(AᵀU, i-j, :) + A.Zₛ[k-j,i-j] else - A.W[k,:]' * A.S[i,:] + A.B[k,i] + UQ * A.S[i,1:A.p] + UK * AᵀU[i-j,:] + A.Xₛ[k-j,:]' * A.S[i,1:A.p] + A.Yₛ[k-j,:]' * AᵀU[i-j,:] + view(A.W, k, :)' * view(A.S, i, :) + A.B[k,i] + UQ * view(A.S, i, 1:p) + UK * view(AᵀU, i-j, :) + view(A.Xₛ, k-j, :)' * view(A.S, i, 1:p) + view(A.Yₛ, k-j, :)' * view(AᵀU, i-j, :) end else if i <= j + l + m - A.W[k,:]' * A.S[i,:] + A.B[k,i] + UQ * A.S[i,1:A.p] + UK * AᵀU[i-j,:] + A.U[k,:]' * A.Eₛ[:,i-j] + view(A.W, k, :)' * view(A.S, i, :) + A.B[k,i] + UQ * view(A.S, i, 1:p) + UK * view(AᵀU, i-j, :) + view(A.U, k, :)' * view(A.Eₛ, :, i-j) else - A.W[k,:]' * A.S[i,:] + A.B[k,i] + UQ * A.S[i,1:A.p] + UK * AᵀU[i-j,:] + view(A.W, k, :)' * view(A.S, i, :) + A.B[k,i] + UQ * view(A.S, i, 1:p) + UK * view(AᵀU, i-j, :) end end else if k <= j + l - A.B[k,i] + UQ * A.S[i,1:A.p] + UK * AᵀU[i-j,:] + A.U[k,:]' * A.Eₛ[:,i-j] + A.Xₛ[k-j,:]' * A.S[i,1:A.p] + A.Yₛ[k-j,:]' * AᵀU[i-j,:] + A.Zₛ[k-j,i-j] + A.B[k,i] + UQ * view(A.S, i, 1:p) + UK * view(AᵀU, i-j, :) + view(A.U, k, :)' * view(A.Eₛ, :, i-j) + view(A.Xₛ, k-j, :)' * view(A.S, i, 1:p) + view(A.Yₛ, k-j, :)' * view(AᵀU, i-j, :) + A.Zₛ[k-j,i-j] elseif i <= j + l + m - A.B[k,i] + UQ * A.S[i,1:A.p] + UK * AᵀU[i-j,:] + A.U[k,:]' * A.Eₛ[:,i-j] + A.B[k,i] + UQ * view(A.S, i, 1:p) + UK * view(AᵀU, i-j, :) + view(A.U, k, :)' * view(A.Eₛ, :, i-j) else - A.B[k,i] + UQ * A.S[i,1:A.p] + UK * AᵀU[i-j,:] + A.B[k,i] + UQ * view(A.S, i, 1:p) + UK * view(AᵀU, i-j, :) end end else if k > i - A.U[k,:]' * A.V[i,:] + A.B[k,i] + view(A.U, k, :)' * view(A.V, i, :) + A.B[k,i] elseif k < i - A.W[k,:]' * A.S[i,:] + A.B[k,i] + view(A.W, k, :)' * view(A.S, i, :) + A.B[k,i] else A.B[k,i] end end end - function qr!(A::BandedPlusSemiseparableQRPerturbedFactors{T}) where T if A.j[] != 0 throw(ErrorException("Matrix has already been partially upper-triangularized")) end - bandedplussemi_qr!(A, zeros(T,A.n), UᵀU_lookup_table(A), ūw̄_sum_lookup_table(A), d_extra_lookup_table(A)) + bandedplussemi_qr!(A, zeros(T,size(A.B, 1)), UᵀU_lookup_table(A), ūw̄_sum_lookup_table(A), d_extra_lookup_table(A)) end function bandedplussemi_qr!(A, τ, tables...) - n = A.n + n = size(A.B, 1) for i in 1 : n-1 onestep_qr!(A, τ, tables...) end A.B[n,n] = A[n,n] A.j[] += 1 - QR(A, τ) end @@ -151,21 +156,21 @@ function qr(A::BandedPlusSemiseparableMatrix) end function onestep_qr!(A, τ, UᵀU, ūw̄_sum, d_extra) - k̄, b, p_new, τ_current= compute_Householder_vector(A, UᵀU) # I- τyy' where y = eⱼ₊₁ + U⁽ʲ⁺²⁾k̄ + b and p_new will be the diagonal element on the current column + k̄, b, pivot_new, τ_current = compute_Householder_vector(A, UᵀU) # I- τyy' where y = eⱼ₊₁ + U⁽ʲ⁺²⁾k̄ + b and p_new will be the diagonal element on the current column τ[A.j[]+1] = τ_current w̄₁, ū₁, d₁, f, d̄ = compute_d₁_and_d̄(A, b) c₁, c₂, c₃, c₄, c₅, c₆ = compute_variables_c(A, k̄, b, UᵀU) - Q_prev = A.Q[:,:] - K_prev = A.K[:,:] - Eₛ_prev = A.Eₛ[:,:] - Xₛ_prev = A.Xₛ[:,:] - Yₛ_prev = A.Yₛ[:,:] - Zₛ_prev = A.Zₛ[:,:] + Q_prev = copy(A.Q) + K_prev = copy(A.K) + Eₛ_prev = copy(A.Eₛ) + Xₛ_prev = copy(A.Xₛ) + Yₛ_prev = copy(A.Yₛ) + Zₛ_prev = copy(A.Zₛ) - update_next_submatrix!(A, k̄, b, τ_current, w̄₁, ū₁, d₁, f, d̄, c₁, c₂, c₃, c₄, c₅, c₆) + update_next_submatrix!(A, k̄, b, τ_current, w̄₁, ū₁, d₁, f, d̄, c₁, c₂, c₃, c₄, c₅, c₆, Q_prev, K_prev, Eₛ_prev, Xₛ_prev, Yₛ_prev, Zₛ_prev) update_upper_triangular_part!(A, k̄, b, τ_current, w̄₁, ū₁, d₁, f, d̄, c₁, c₂, c₃, c₄, c₅, c₆, Q_prev, K_prev, Eₛ_prev, Xₛ_prev, Yₛ_prev, Zₛ_prev, ūw̄_sum, d_extra) - update_lower_triangular_part!(A, p_new, k̄, b) + update_lower_triangular_part!(A, pivot_new, k̄, b) A.j[] += 1 @@ -178,175 +183,185 @@ function compute_Householder_vector(A, UᵀU) # first express A[j+1:end,j+1] as p*eⱼ₊₁ + U⁽ʲ⁺²⁾k̄ + b j = A.j[] - UᵀA_1 = (A.U[j+1:min(A.l+j+1,A.n),:])'*A.B[j+1:min(A.l+j+1,A.n),j+1] + UᵀU[j+2,:,:]*A.V[j+1,:] #the j+1th column of UᵀA - - k̄ = A.V[j+1,:] + A.Q * A.S[j+1,1:A.p] + A.K * UᵀA_1 - if A.l > 0 || A.m > 0 - k̄ += A.Eₛ[:,1] + n = size(A.B, 1) + p = size(A.W, 2) - size(A.U, 2) # W already been padded to have size n × (p+r) + l, m = bandwidths(A.B) + m = m - l # B already been padded to have upper bandwidth l+m + UᵀA_1 = (view(A.U, j+1:min(l+j+1, n),:))' * view(A.B, j+1:min(l+j+1, n) , j+1) + view(UᵀU, j+2 , : , :) * view(A.V, j+1, :) #the j+1th column of UᵀA + + k̄ = view(A.V, j+1, :) + A.Q * view(A.S, j+1, 1:p) + A.K * UᵀA_1 + if l > 0 || m > 0 + k̄ .+= view(A.Eₛ, :, 1) end - b = A.B[j+2:min(A.l+j+1,A.n), j+1] - if A.l > 0 || A.m > 0 - b[1:min(A.l-1,length(b))] += (A.Xₛ * A.S[j+1,1:A.p] + A.Yₛ * UᵀA_1 + A.Zₛ[:,1])[2:min(A.l,length(b)+1)] + b = A.B[j+2:min(l+j+1, n), j+1] + if l > 0 || m > 0 + b[1:min(l-1,length(b))] .+= view((A.Xₛ * view(A.S, j+1, 1:p) + A.Yₛ * UᵀA_1 + view(A.Zₛ, :, 1)), 2:min(l,length(b)+1)) end - p = A.B[j+1,j+1] + A.U[j+1,:]'A.Q * A.S[j+1,1:A.p] + A.U[j+1,:]'A.K*UᵀA_1 - if A.l > 0 - p += A.U[j+1,:]'A.Eₛ[:,1] + A.Xₛ[1,:]'A.S[j+1,1:A.p] + A.Yₛ[1,:]'UᵀA_1 + A.Zₛ[1,1] - elseif A.m > 0 - p += A.U[j+1,:]'A.Eₛ[:,1] + pivot = A.B[j+1,j+1] + (view(A.U, j+1, :))' * A.Q * view(A.S, j+1, 1:p) + view(A.U, j+1, :)' * A.K * UᵀA_1 + if l > 0 + pivot += (view(A.U, j+1 , :))' * view(A.Eₛ, :, 1) + (view(A.Xₛ, 1 , :))' * view(A.S, j+1 , 1:p) + (view(A.Yₛ, 1, :))' * UᵀA_1 + A.Zₛ[1,1] + elseif m > 0 + pivot += (view(A.U, j+1, :))' * view(A.Eₛ, :, 1) end # compute the length square of A[j+1:end,j+1] - len_square = p^2 + k̄' * UᵀU[j+2,:,:] * k̄ - if A.l > 0 - len_square += k̄' * A.U[j+2:j+1+length(b),:]' * b + b' * A.U[j+2:j+1+length(b),:] * k̄ + b'b + len_square = pivot^2 + k̄' * view(UᵀU, j+2, :, :) * k̄ + if l > 0 + len_square += k̄' * (view(A.U, j+2:j+1+length(b), :))' * b + b' * view(A.U, j+2:j+1+length(b), :) * k̄ + b'b end - p_new = -sign(p) * sqrt(len_square) # the element on the diagonal after HT - k̄ = k̄ / (p - p_new) - b = b / (p - p_new) - τ_current = 2 * (p - p_new)^2 / ((p-p_new)^2 + (len_square - p^2)) # the value τ for the current HT - k̄, b, p_new, τ_current + pivot_new = -sign(pivot) * sqrt(len_square) # the element on the diagonal after HT + k̄ ./= (pivot - pivot_new) + b ./= (pivot - pivot_new) + τ_current = 2 * (pivot - pivot_new)^2 / ((pivot-pivot_new)^2 + (len_square - pivot^2)) # the value τ for the current HT + k̄, b, pivot_new, τ_current end function compute_d₁_and_d̄(A, b) j = A.j[] - w̄₁ = A.W[j+1,1:A.p] - ū₁ = A.U[j+1,:] - d₁ = A.B[j+1,j+1:min(j+1+A.m, A.n)] - d₁[1] = d₁[1] - w̄₁'*(A.S[j+1,1:A.p]) - f = (A.W[j+2:j+1+length(b),1:A.p])' * b + n = size(A.B, 1) + p = size(A.W, 2) - size(A.U, 2) # W already been padded to have size n × (p+r) + l, m = bandwidths(A.B) + m = m - l # B already been padded to have upper bandwidth l+m + w̄₁ = view(A.W, j+1, 1:p) + ū₁ = view(A.U, j+1, :) + d₁ = A.B[j+1,j+1:min(j+1+m, n)] + d₁[1] = d₁[1] - w̄₁'*(view(A.S, j+1, 1:p)) + f = (view(A.W, j+2:j+1+length(b), 1:p))' * b index1 = j+2:j+1+length(b) - index2 = j+1:min(j+1+A.m+A.l,A.n) + index2 = j+1:min(j+1+m+l, n) tril_sub = [ (ii - jj) >= 1 for ii in index1, jj in index2 ] triu_sub = [ (jj - ii) >= 1 for ii in index1, jj in index2 ] - A₀ = A.U[index1,:]*A.V[index2,:]'.*tril_sub + A.B[index1,index2] + A.W[index1,1:A.p]*A.S[index2,1:A.p]'.*triu_sub - d̄ = (b'A₀ - f'*(A.S[index2,1:A.p])')' + A₀ = view(A.U, index1, :) * view(A.V, index2, :)' .* tril_sub + view(A.B, index1, index2) + view(A.W, index1, 1:p) * view(A.S, index2, 1:p)' .* triu_sub + d̄ = (b'A₀ - f'*(view(A.S, index2, 1:p))')' w̄₁, ū₁, d₁, f, d̄ end function compute_variables_c(A, k̄, b, UᵀU) j = A.j[] - UᵀU⁽²⁾ = UᵀU[j+2,:,:] - - c₁ = (A.Q)' * A.U[j+1,:] + (A.Q)' * UᵀU⁽²⁾ * k̄ + (A.Q)' * (A.U[j+2:j+1+length(b),:])' * b - c₂ = (A.K)' * A.U[j+1,:] + (A.K)' * UᵀU⁽²⁾ * k̄ + (A.K)' * (A.U[j+2:j+1+length(b),:])' * b - c₃ = A.U[j+1,:] + UᵀU⁽²⁾ * k̄ + (A.U[j+2:j+1+length(b),:])' * b - - Xₛ_valid = A.Xₛ[2:min(A.l, A.n-j),:] - Yₛ_valid = A.Yₛ[2:min(A.l, A.n-j),:] - Zₛ_valid = A.Zₛ[2:min(A.l, A.n-j),1:min(A.l+A.m, A.n-j)] - c₄ = (Xₛ_valid)' * A.U[j+2:j+1+size(Xₛ_valid,1),:] * k̄ + (Xₛ_valid)' * b[1:size(Xₛ_valid,1)] - c₅ = (Yₛ_valid)' * A.U[j+2:j+1+size(Yₛ_valid,1),:] * k̄ + (Yₛ_valid)' * b[1:size(Yₛ_valid,1)] - c₆ = (Zₛ_valid)' * A.U[j+2:j+1+size(Zₛ_valid,1),:] * k̄ + (Zₛ_valid)' * b[1:size(Zₛ_valid,1)] - if A.l > 0 - c₄ += A.Xₛ[1,:] - c₅ += A.Yₛ[1,:] - c₆ += A.Zₛ[1,1:min(A.l+A.m, A.n-j)] + n = size(A.B, 1) + l, m = bandwidths(A.B) + m = m - l # B already been padded to have upper bandwidth l+m + UᵀU⁽²⁾ = view(UᵀU, j+2, :, :) + + c₁ = (A.Q)' * view(A.U, j+1, :) + (A.Q)' * UᵀU⁽²⁾ * k̄ + (A.Q)' * (view(A.U, j+2:j+1+length(b), :))' * b + c₂ = (A.K)' * view(A.U, j+1, :) + (A.K)' * UᵀU⁽²⁾ * k̄ + (A.K)' * (view(A.U, j+2:j+1+length(b), :))' * b + c₃ = view(A.U, j+1, :) + UᵀU⁽²⁾ * k̄ + (view(A.U, j+2:j+1+length(b), :))' * b + + Xₛ_valid = view(A.Xₛ, 2:min(l, n-j), :) + Yₛ_valid = view(A.Yₛ, 2:min(l, n-j), :) + Zₛ_valid = view(A.Zₛ, 2:min(l, n-j), 1:min(l+m, n-j)) + c₄ = (Xₛ_valid)' * view(A.U, j+2:j+1+size(Xₛ_valid,1), :) * k̄ + (Xₛ_valid)' * view(b, 1:size(Xₛ_valid,1)) + c₅ = (Yₛ_valid)' * view(A.U, j+2:j+1+size(Yₛ_valid,1), :) * k̄ + (Yₛ_valid)' * view(b, 1:size(Yₛ_valid,1)) + c₆ = (Zₛ_valid)' * view(A.U, j+2:j+1+size(Zₛ_valid,1), :) * k̄ + (Zₛ_valid)' * view(b, 1:size(Zₛ_valid,1)) + if l > 0 + c₄ .+= view(A.Xₛ, 1, :) + c₅ .+= view(A.Yₛ, 1, :) + c₆ .+= view(A.Zₛ, 1, 1:min(l+m, n-j)) end c₁, c₂, c₃, c₄, c₅, c₆ end -function update_next_submatrix!(A, k̄, b, τ, w̄₁, ū₁, d₁, f, d̄, c₁, c₂, c₃, c₄, c₅, c₆) - Q_prev = A.Q[:,:] - K_prev = A.K[:,:] - Eₛ_prev = A.Eₛ[:,:] - Xₛ_prev = A.Xₛ[:,:] - Yₛ_prev = A.Yₛ[:,:] - Zₛ_prev = A.Zₛ[:,:] - +function update_next_submatrix!(A, k̄, b, τ, w̄₁, ū₁, d₁, f, d̄, c₁, c₂, c₃, c₄, c₅, c₆, Q_prev, K_prev, Eₛ_prev, Xₛ_prev, Yₛ_prev, Zₛ_prev) A.Q[:,:] = -τ*k̄*w̄₁' - τ*k̄*f' + Q_prev - τ*k̄*c₁' + K_prev*ū₁*w̄₁'- τ*k̄*c₂'*ū₁*w̄₁' - τ*k̄*c₄' - τ*k̄*c₅'*ū₁*w̄₁' A.K[:,:] = -τ*k̄*k̄' + K_prev - τ*k̄*c₂' - τ*k̄*c₅' - A.Eₛ[:,1:length(d̄)-1] = -τ*k̄*(d̄[2:end])' - A.Eₛ[:,1:length(d₁)-1] += (-τ*k̄ + K_prev*ū₁ - τ*k̄*c₂'*ū₁ - τ*k̄*c₅'*ū₁)*(d₁[2:end])' - A.Eₛ[:,1:end-1] += Eₛ_prev[:,2:end] - τ*k̄*c₃'*Eₛ_prev[:,2:end] - A.Eₛ[:,1:length(c₆)-1] += -τ*k̄*(c₆[2:end])' - A.Eₛ[:,length(d̄):end] .= 0 + A.Eₛ[:,1:length(d̄)-1] = -τ*k̄*(view(d̄, 2:length(d̄)))' + A.Eₛ[:,1:length(d₁)-1] .+= (-τ*k̄ + K_prev*ū₁ - τ*k̄*c₂'*ū₁ - τ*k̄*c₅'*ū₁)*(view(d₁, 2:length(d₁)))' + A.Eₛ[:,1:end-1] .+= view(Eₛ_prev, :, 2:size(Eₛ_prev,2)) - τ*k̄*c₃'*view(Eₛ_prev, :, 2:size(Eₛ_prev,2)) + A.Eₛ[:,1:length(c₆)-1] .+= -τ*k̄*(view(c₆, 2:length(c₆)))' + A.Eₛ[:,length(d̄):end] .= zero(eltype(A)) A.Xₛ[1:length(b),:] = b*(-τ*w̄₁' - τ*f' - τ*c₁' - τ*c₂'*ū₁*w̄₁' - τ*c₄' - τ*c₅'*ū₁*w̄₁') - A.Xₛ[1:end-1,:] += Xₛ_prev[2:end,:] + Yₛ_prev[2:end,:]*ū₁*w̄₁' - A.Xₛ[length(b)+1:end,:] .= 0 + A.Xₛ[1:end-1,:] .+= view(Xₛ_prev, 2:size(Xₛ_prev,1), :) + view(Yₛ_prev, 2:size(Yₛ_prev,1), :)*ū₁*w̄₁' + A.Xₛ[length(b)+1:end,:] .= zero(eltype(A)) A.Yₛ[1:length(b),:] = b*(-τ*k̄' - τ*c₂' - τ*c₅') - A.Yₛ[1:end-1,:] += Yₛ_prev[2:end,:] - A.Yₛ[length(b)+1:end,:] .= 0 - - A.Zₛ[1:length(b), 1:length(d̄)-1] = -τ*b*(d̄[2:end])' - A.Zₛ[1:length(b), 1:length(d₁)-1] += b*(-τ - τ*c₂'*ū₁ - τ*c₅'*ū₁)*(d₁[2:end])' - A.Zₛ[1:length(b), 1:end-1] += -τ*b*c₃'*Eₛ_prev[:,2:end] - A.Zₛ[1:end-1, 1:length(d₁)-1] += Yₛ_prev[2:end,:]*ū₁*(d₁[2:end])' - A.Zₛ[1:end-1, 1:end-1] += Zₛ_prev[2:end, 2:end] - A.Zₛ[1:length(b), 1:length(c₆)-1] += -τ*b*(c₆[2:end])' - A.Zₛ[length(b)+1:end,:] .= 0 - A.Zₛ[:,length(d̄):end] .= 0 + A.Yₛ[1:end-1,:] .+= view(Yₛ_prev, 2:size(Yₛ_prev,1), :) + A.Yₛ[length(b)+1:end,:] .= zero(eltype(A)) + + A.Zₛ[1:length(b), 1:length(d̄)-1] = -τ*b*(view(d̄, 2:length(d̄)))' + A.Zₛ[1:length(b), 1:length(d₁)-1] .+= b*(-τ - τ*c₂'*ū₁ - τ*c₅'*ū₁)*(view(d₁, 2:length(d₁)))' + A.Zₛ[1:length(b), 1:end-1] .+= -τ*b*c₃'*view(Eₛ_prev, :, 2:size(Eₛ_prev, 2)) + A.Zₛ[1:end-1, 1:length(d₁)-1] .+= view(Yₛ_prev, 2:size(Yₛ_prev,1), :)*ū₁*(view(d₁, 2:length(d₁)))' + A.Zₛ[1:end-1, 1:end-1] .+= view(Zₛ_prev, 2:size(Zₛ_prev,1), 2:size(Zₛ_prev,2)) + A.Zₛ[1:length(b), 1:length(c₆)-1] .+= -τ*b*(view(c₆, 2:length(c₆)))' + A.Zₛ[length(b)+1:end,:] .= zero(eltype(A)) + A.Zₛ[:,length(d̄):end] .= zero(eltype(A)) end function update_upper_triangular_part!(A, k̄, b, τ, w̄₁, ū₁, d₁, f, d̄, c₁, c₂, c₃, c₄, c₅, c₆, Q, K, Eₛ, Xₛ, Yₛ, Zₛ, ūw̄_sum, d_extra) j = A.j[] - + p = size(A.W, 2) - size(A.U, 2) # W already been padded to have size n × (p+r) β = (-τ*k̄' + ū₁'*K - τ*c₂' - τ*c₅')' if size(Yₛ, 1) > 0 - β += Yₛ[1,:] + β .+= view(Yₛ, 1, :) end α = (w̄₁' - τ*w̄₁' - τ*f' + ū₁'*Q - τ*c₁' - τ*c₄' + τ*k̄'*ū₁*w̄₁')' if size(Xₛ, 1) > 0 - α += Xₛ[1,:] + α .+= view(Xₛ, 1, :) end - α += (-β'*ūw̄_sum[j+1,:,:])' + α .+= (-β' * view(ūw̄_sum, j+1, :, :))' - d = -τ*d̄[2:end] - d[1:length(d₁)-1] += (1 - τ + τ*k̄'*ū₁)*d₁[2:end] - d[1:min(length(d),size(Eₛ,2)-1)] += ((ū₁' - τ*c₃')*Eₛ[:,2:min(length(d)+1,size(Eₛ,2))])' - d[1:length(c₆)-1] += -τ*c₆[2:end] + d = -τ * d̄[2:end] + d[1:length(d₁)-1] .+= (1 - τ + τ*k̄'*ū₁) * view(d₁, 2:length(d₁)) + d[1:min(length(d),size(Eₛ,2)-1)] .+= ((ū₁' - τ*c₃') * view(Eₛ, :, 2:min(length(d)+1,size(Eₛ,2))))' + d[1:length(c₆)-1] .+= -τ * view(c₆, 2:length(c₆)) if size(Zₛ, 1) > 0 - d[1:min(length(d),size(Zₛ,2)-1)] += Zₛ[1, 2:min(length(d)+1,size(Zₛ,2))] + d[1:min(length(d),size(Zₛ,2)-1)] .+= view(Zₛ, 1, 2:min(length(d)+1,size(Zₛ,2))) end d_extra_current = d_extra[j+1] - d[1:size(d_extra_current,2)] += (-β'*d_extra_current)' + d[1:size(d_extra_current,2)] .+= (-β'*d_extra_current)' j = A.j[] - A.W[j+1, 1:A.p] = α - A.W[j+1, A.p+1:end] = β + A.W[j+1, 1:p] = α + A.W[j+1, p+1:end] = β A.B[j+1, j+2:j+1+length(d)] = d end -function update_lower_triangular_part!(A, p, k̄, b) +function update_lower_triangular_part!(A, pivot, k̄, b) j = A.j[] - A.B[j+1, j+1] = p + A.B[j+1, j+1] = pivot A.B[j+2:j+1+length(b), j+1] = b A.V[j+1, :] = k̄ end function UᵀU_lookup_table(A) - UᵀU = zeros(A.n, A.r, A.r) - UᵀU_current = zeros(A.r, A.r) - for i in A.n:-1:1 - UᵀU_current += A.U[i,:] * (A.U[i,:])' - UᵀU[i,:,:] = UᵀU_current[:,:] + n, r = size(A.U) + UᵀU = zeros(eltype(A), n, r, r) + UᵀU_current = zeros(eltype(A), r, r) + for i in n:-1:1 + UᵀU_current .+= view(A.U, i, :) * (view(A.U, i, :))' + UᵀU[i,:,:] .= UᵀU_current end UᵀU end function ūw̄_sum_lookup_table(A) - ūw̄_sum = zeros(A.n, A.r, A.p) - ūw̄_sum_current = zeros(A.r, A.p) - for t in 1:A.n - ūw̄_sum[t,:,:] = ūw̄_sum_current[:,:] - ūw̄_sum_current[:,:] += A.U[t,:] * (A.W[t,1:A.p])' + n, r = size(A.U) + p = size(A.W, 2) - size(A.U, 2) # W already been padded to have size n × (p+r) + ūw̄_sum = zeros(eltype(A), n, r, p) + ūw̄_sum_current = zeros(eltype(A), r, p) + for t in 1:n + ūw̄_sum[t,:,:] .= ūw̄_sum_current + ūw̄_sum_current .+= view(A.U, t, :) * (view(A.W, t, 1:p))' end ūw̄_sum end function d_extra_lookup_table(A) + n, r = size(A.U) + l, m = bandwidths(A.B) + m = m - l # B already been padded to have upper bandwidth l+m d_extra = Vector{Matrix{eltype(A)}}() - for i in 1:A.n - d_extra_current = zeros(A.r, min(A.m, A.n-i)) - for t in max(1,i+1-A.m) : i-1 - d_extra_current += A.U[t,:] * (A.B[t, i+1:i+size(d_extra_current,2)])' + for i in 1:n + d_extra_current = zeros(eltype(A), r, min(m, n-i)) + for t in max(1,i+1-m) : i-1 + d_extra_current .+= view(A.U, t, :) * (view(A.B, t, i+1:i+size(d_extra_current,2)))' end push!(d_extra, d_extra_current) end @@ -356,17 +371,16 @@ end function fast_UᵀA(U, V, W, S, B, j) # compute U[j,end]ᵀ*A[j:end,j:end] where A = tril(UV',-1) + B + triu(WS',1) in O(n) - n = size(U,1) - r = size(U,2) + n, r = size(U) p = size(W,2) l, m = bandwidths(B) - UᵀA = zeros(r, n+1-j) - UᵀU = (U[j:end,:])'*U[j:end,:] - UᵀW = zeros(r,p) + UᵀA = zeros(eltype(U), r, n+1-j) + UᵀU = (view(U, j:n, :))' * view(U, j:n, :) + UᵀW = zeros(eltype(U), r, p) for i in j:n - UᵀU -= U[i,:] * (U[i,:])' - UᵀA[:,i+1-j] = UᵀU*V[i,:] + (U[max(j,i-m) : min(i+l,n),:])'*B[max(j,i-m) : min(i+l,n),i] + UᵀW*S[i,:] - UᵀW += U[i,:] * (W[i,:])' + UᵀU .-= view(U, i, :) * (view(U, i, :))' + UᵀA[:,i+1-j] = UᵀU*view(V, i, :) + (view(U, max(j,i-m) : min(i+l,n), :))'*view(B, max(j,i-m) : min(i+l,n), i) + UᵀW*view(S, i, :) + UᵀW .+= view(U, i, :) * (view(W, i, :))' end UᵀA end \ No newline at end of file diff --git a/test/test_QR.jl b/test/test_QR.jl index fbbb1e8..57a3ade 100644 --- a/test/test_QR.jl +++ b/test/test_QR.jl @@ -10,7 +10,7 @@ using SemiseparableMatrices: BandedPlusSemiseparableQRPerturbedFactors @testset "BandedPlusSemiseparableQRPerturbedFactors" begin U,V = randn(n,r), randn(n,r) W,S = randn(n,p), randn(n,p) - A = BandedPlusSemiseparableQRPerturbedFactors(U,V,W,S,B) + A = BandedPlusSemiseparableQRPerturbedFactors(B, (U,V), (W,S)) fact_true = LinearAlgebra.qrfactUnblocked!(Matrix(A)) fact = qr!(A) @test A ≈ fact_true.factors ≈ fact.factors From f91df4c74ff6125026ff272b9a35f2a3fa7f709c Mon Sep 17 00:00:00 2001 From: Sheehan Olver Date: Thu, 20 Nov 2025 11:02:13 +0000 Subject: [PATCH 10/19] Update src/BandedPlusSemiseparableMatrices.jl --- src/BandedPlusSemiseparableMatrices.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/BandedPlusSemiseparableMatrices.jl b/src/BandedPlusSemiseparableMatrices.jl index 6fcdc20..e395a0a 100644 --- a/src/BandedPlusSemiseparableMatrices.jl +++ b/src/BandedPlusSemiseparableMatrices.jl @@ -11,7 +11,7 @@ function BandedPlusSemiseparableMatrix(B, (U,V), (W,S)) if size(U,1) == size(V,1) == size(W,1) == size(S,1) == size(B,1) == size(B,2) && size(U,2) == size(V,2) && size(W,2) == size(S,2) BandedPlusSemiseparableMatrix(B, U, V, W, S) else - throw(ErrorException("Dimensions not match!")) +throw(DimensionMismatch("Dimensions are not compatible.")) end end From 165491d8cee89fa5ee4af8371886c9ccfa17e060 Mon Sep 17 00:00:00 2001 From: Sheehan Olver Date: Thu, 20 Nov 2025 11:05:00 +0000 Subject: [PATCH 11/19] Update src/BandedPlusSemiseparableMatrices.jl --- src/BandedPlusSemiseparableMatrices.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/BandedPlusSemiseparableMatrices.jl b/src/BandedPlusSemiseparableMatrices.jl index e395a0a..20a0a3b 100644 --- a/src/BandedPlusSemiseparableMatrices.jl +++ b/src/BandedPlusSemiseparableMatrices.jl @@ -136,7 +136,7 @@ function qr!(A::BandedPlusSemiseparableQRPerturbedFactors{T}) where T throw(ErrorException("Matrix has already been partially upper-triangularized")) end - bandedplussemi_qr!(A, zeros(T,size(A.B, 1)), UᵀU_lookup_table(A), ūw̄_sum_lookup_table(A), d_extra_lookup_table(A)) + bandedplussemi_qr!(A, zeros(T,size(A, 1)), UᵀU_lookup_table(A), ūw̄_sum_lookup_table(A), d_extra_lookup_table(A)) end function bandedplussemi_qr!(A, τ, tables...) From 262b792bf172f4101222a82c0803af7bfdf664c5 Mon Sep 17 00:00:00 2001 From: Sheehan Olver Date: Thu, 20 Nov 2025 11:06:17 +0000 Subject: [PATCH 12/19] Update src/BandedPlusSemiseparableMatrices.jl --- src/BandedPlusSemiseparableMatrices.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/BandedPlusSemiseparableMatrices.jl b/src/BandedPlusSemiseparableMatrices.jl index 20a0a3b..3741534 100644 --- a/src/BandedPlusSemiseparableMatrices.jl +++ b/src/BandedPlusSemiseparableMatrices.jl @@ -140,7 +140,7 @@ function qr!(A::BandedPlusSemiseparableQRPerturbedFactors{T}) where T end function bandedplussemi_qr!(A, τ, tables...) - n = size(A.B, 1) + n = size(A, 1) for i in 1 : n-1 onestep_qr!(A, τ, tables...) end From 95c460c50d608e1d6fb6a322a9f056399c53c300 Mon Sep 17 00:00:00 2001 From: Sheehan Olver Date: Thu, 20 Nov 2025 11:13:52 +0000 Subject: [PATCH 13/19] Delete test_QR.jl --- test/test_QR.jl | 31 ------------------------------- 1 file changed, 31 deletions(-) delete mode 100644 test/test_QR.jl diff --git a/test/test_QR.jl b/test/test_QR.jl deleted file mode 100644 index 57a3ade..0000000 --- a/test/test_QR.jl +++ /dev/null @@ -1,31 +0,0 @@ -using BandedMatrices, LinearAlgebra, Random, SemiseparableMatrices, Test -using SemiseparableMatrices: BandedPlusSemiseparableQRPerturbedFactors -#Random.seed!(1234) - -@testset "QR" begin - n = 20 - l, m, r, p = 4, 5, 2, 3 - B = brandn(n,n,l,m) - - @testset "BandedPlusSemiseparableQRPerturbedFactors" begin - U,V = randn(n,r), randn(n,r) - W,S = randn(n,p), randn(n,p) - A = BandedPlusSemiseparableQRPerturbedFactors(B, (U,V), (W,S)) - fact_true = LinearAlgebra.qrfactUnblocked!(Matrix(A)) - fact = qr!(A) - @test A ≈ fact_true.factors ≈ fact.factors - @test fact_true.τ ≈ fact.τ - end - - @testset "BandedPlusSemiseparableMatrix" begin - U,V = randn(n,r), randn(n,r) - W,S = randn(n,p), randn(n,p) - A = BandedPlusSemiseparableMatrix(B,(U,V),(W,S)) - A_true = Matrix(A) - fact_true = LinearAlgebra.qrfactUnblocked!(Matrix(A)) - fact = qr(A) - @test A ≈ A_true - @test fact_true.factors ≈ fact.factors - @test fact_true.τ ≈ fact.τ - end -end \ No newline at end of file From 8a6d491c9d0b5dfce228a010d693c7f5862e3306 Mon Sep 17 00:00:00 2001 From: Sheehan Olver Date: Thu, 20 Nov 2025 11:13:59 +0000 Subject: [PATCH 14/19] Create test_qr.jl --- test/test_qr.jl | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) create mode 100644 test/test_qr.jl diff --git a/test/test_qr.jl b/test/test_qr.jl new file mode 100644 index 0000000..57a3ade --- /dev/null +++ b/test/test_qr.jl @@ -0,0 +1,31 @@ +using BandedMatrices, LinearAlgebra, Random, SemiseparableMatrices, Test +using SemiseparableMatrices: BandedPlusSemiseparableQRPerturbedFactors +#Random.seed!(1234) + +@testset "QR" begin + n = 20 + l, m, r, p = 4, 5, 2, 3 + B = brandn(n,n,l,m) + + @testset "BandedPlusSemiseparableQRPerturbedFactors" begin + U,V = randn(n,r), randn(n,r) + W,S = randn(n,p), randn(n,p) + A = BandedPlusSemiseparableQRPerturbedFactors(B, (U,V), (W,S)) + fact_true = LinearAlgebra.qrfactUnblocked!(Matrix(A)) + fact = qr!(A) + @test A ≈ fact_true.factors ≈ fact.factors + @test fact_true.τ ≈ fact.τ + end + + @testset "BandedPlusSemiseparableMatrix" begin + U,V = randn(n,r), randn(n,r) + W,S = randn(n,p), randn(n,p) + A = BandedPlusSemiseparableMatrix(B,(U,V),(W,S)) + A_true = Matrix(A) + fact_true = LinearAlgebra.qrfactUnblocked!(Matrix(A)) + fact = qr(A) + @test A ≈ A_true + @test fact_true.factors ≈ fact.factors + @test fact_true.τ ≈ fact.τ + end +end \ No newline at end of file From 88fadc2d2bcea8beabce85795cd9fff0431d474e Mon Sep 17 00:00:00 2001 From: Sheehan Olver Date: Thu, 20 Nov 2025 12:11:40 +0000 Subject: [PATCH 15/19] Update BandedPlusSemiseparableMatrices.jl --- src/BandedPlusSemiseparableMatrices.jl | 43 ++++++++++++++++---------- 1 file changed, 27 insertions(+), 16 deletions(-) diff --git a/src/BandedPlusSemiseparableMatrices.jl b/src/BandedPlusSemiseparableMatrices.jl index 6fcdc20..210c857 100644 --- a/src/BandedPlusSemiseparableMatrices.jl +++ b/src/BandedPlusSemiseparableMatrices.jl @@ -1,13 +1,13 @@ struct BandedPlusSemiseparableMatrix{T} <: LayoutMatrix{T} # representing B + tril(UV', -1) + triu(WS', 1) - B::BandedMatrix{T} - U::Matrix{T} - V::Matrix{T} - W::Matrix{T} - S::Matrix{T} + B::BandedMatrix{T} + U::Matrix{T} + V::Matrix{T} + W::Matrix{T} + S::Matrix{T} end -function BandedPlusSemiseparableMatrix(B, (U,V), (W,S)) +function BandedPlusSemiseparableMatrix(B, (U,V), (W,S)) if size(U,1) == size(V,1) == size(W,1) == size(S,1) == size(B,1) == size(B,2) && size(U,2) == size(V,2) && size(W,2) == size(S,2) BandedPlusSemiseparableMatrix(B, U, V, W, S) else @@ -19,7 +19,7 @@ size(A::BandedPlusSemiseparableMatrix) = size(A.B) copy(A::BandedPlusSemiseparableMatrix) = A # not mutable function getindex(A::BandedPlusSemiseparableMatrix, k::Integer, j::Integer) - if j > k + if j > k view(A.W, k, :)' * view(A.S, j, :) + A.B[k,j] elseif k > j view(A.U, k, :)' * view(A.V, j, :) + A.B[k,j] @@ -68,7 +68,7 @@ function BandedPlusSemiseparableQRPerturbedFactors(B, (U,V), (W,S)) n, r = size(U) p = size(W,2) l, m = bandwidths(B) - A = tril(U*V',-1) + B + triu(W*S',1) + # A = tril(U*V',-1) + B + triu(W*S',1) AᵀU = (fast_UᵀA(U, V, W, S, B, 1))' BandedPlusSemiseparableQRPerturbedFactors(BandedMatrix(B,(l,l+m)),U,V,[W zeros(n,r)],[S AᵀU], zeros(r,p),zeros(r,r),zeros(r,min(l+m,n)),zeros(min(l,n),p),zeros(min(l,n),r),zeros(min(l,n),min(l+m,n)),Ref(0)) @@ -117,7 +117,7 @@ function getindex(A::BandedPlusSemiseparableQRPerturbedFactors, k::Integer, i::I elseif i <= j + l + m A.B[k,i] + UQ * view(A.S, i, 1:p) + UK * view(AᵀU, i-j, :) + view(A.U, k, :)' * view(A.Eₛ, :, i-j) else - A.B[k,i] + UQ * view(A.S, i, 1:p) + UK * view(AᵀU, i-j, :) + A.B[k,i] + UQ * view(A.S, i, 1:p) + UK * view(AᵀU, i-j, :) end end else @@ -167,7 +167,7 @@ function onestep_qr!(A, τ, UᵀU, ūw̄_sum, d_extra) Xₛ_prev = copy(A.Xₛ) Yₛ_prev = copy(A.Yₛ) Zₛ_prev = copy(A.Zₛ) - + update_next_submatrix!(A, k̄, b, τ_current, w̄₁, ū₁, d₁, f, d̄, c₁, c₂, c₃, c₄, c₅, c₆, Q_prev, K_prev, Eₛ_prev, Xₛ_prev, Yₛ_prev, Zₛ_prev) update_upper_triangular_part!(A, k̄, b, τ_current, w̄₁, ū₁, d₁, f, d̄, c₁, c₂, c₃, c₄, c₅, c₆, Q_prev, K_prev, Eₛ_prev, Xₛ_prev, Yₛ_prev, Zₛ_prev, ūw̄_sum, d_extra) update_lower_triangular_part!(A, pivot_new, k̄, b) @@ -207,7 +207,7 @@ function compute_Householder_vector(A, UᵀU) # compute the length square of A[j+1:end,j+1] len_square = pivot^2 + k̄' * view(UᵀU, j+2, :, :) * k̄ if l > 0 - len_square += k̄' * (view(A.U, j+2:j+1+length(b), :))' * b + b' * view(A.U, j+2:j+1+length(b), :) * k̄ + b'b + len_square += k̄' * (view(A.U, j+2:j+1+length(b), :))' * b + b' * view(A.U, j+2:j+1+length(b), :) * k̄ + b'b end pivot_new = -sign(pivot) * sqrt(len_square) # the element on the diagonal after HT @@ -263,10 +263,21 @@ function compute_variables_c(A, k̄, b, UᵀU) c₁, c₂, c₃, c₄, c₅, c₆ end -function update_next_submatrix!(A, k̄, b, τ, w̄₁, ū₁, d₁, f, d̄, c₁, c₂, c₃, c₄, c₅, c₆, Q_prev, K_prev, Eₛ_prev, Xₛ_prev, Yₛ_prev, Zₛ_prev) - A.Q[:,:] = -τ*k̄*w̄₁' - τ*k̄*f' + Q_prev - τ*k̄*c₁' + K_prev*ū₁*w̄₁'- - τ*k̄*c₂'*ū₁*w̄₁' - τ*k̄*c₄' - τ*k̄*c₅'*ū₁*w̄₁' - A.K[:,:] = -τ*k̄*k̄' + K_prev - τ*k̄*c₂' - τ*k̄*c₅' +function update_next_submatrix!(A::AbstractMatrix{T}, k̄, b, τ, w̄₁, ū₁, d₁, f, d̄, c₁, c₂, c₃, c₄, c₅, c₆, Q_prev, K_prev, Eₛ_prev, Xₛ_prev, Yₛ_prev, Zₛ_prev) where T + # A.Q .= -τ*k̄*w̄₁' - τ*k̄*f' + Q_prev - τ*k̄*c₁' + K_prev*ū₁*w̄₁'- + # τ*k̄*c₂'*ū₁*w̄₁' - τ*k̄*c₄' - τ*k̄*c₅'*ū₁*w̄₁' + mul!(A.Q, k̄, w̄₁', -τ, one(T)) + mul!(A.Q, k̄, f', -τ, one(T)) + mul!(A.Q, k̄, c₁', -τ, one(T)) + mul!(A.Q, K_prev, ū₁*w̄₁', one(T), one(T)) # TODO: write to tem buffer? + mul!(A.Q, k̄, w̄₁', -τ*(c₂'*ū₁+c₅'*ū₁), one(T)) + mul!(A.Q, k̄, c₄', -τ, one(T)) + + + + + + A.K .= -τ*k̄*k̄' + K_prev - τ*k̄*c₂' - τ*k̄*c₅' A.Eₛ[:,1:length(d̄)-1] = -τ*k̄*(view(d̄, 2:length(d̄)))' A.Eₛ[:,1:length(d₁)-1] .+= (-τ*k̄ + K_prev*ū₁ - τ*k̄*c₂'*ū₁ - τ*k̄*c₅'*ū₁)*(view(d₁, 2:length(d₁)))' @@ -331,7 +342,7 @@ function update_lower_triangular_part!(A, pivot, k̄, b) end function UᵀU_lookup_table(A) - n, r = size(A.U) + n, r = size(A.U) UᵀU = zeros(eltype(A), n, r, r) UᵀU_current = zeros(eltype(A), r, r) for i in n:-1:1 From d51520c6c8d7c6373190ef12d0016dd9712b8ec9 Mon Sep 17 00:00:00 2001 From: Sheehan Olver Date: Thu, 20 Nov 2025 15:44:19 +0000 Subject: [PATCH 16/19] start ldiv! and lmul! for fast QR solves --- src/BandedPlusSemiseparableMatrix.jl | 33 +++++++++++ src/SemiseparableMatrices.jl | 7 ++- ...eparableMatrices.jl => semiseparableqr.jl} | 56 +++++++++---------- test/test_bandedplussemiseparable.jl | 9 ++- test/test_qr.jl | 9 ++- 5 files changed, 78 insertions(+), 36 deletions(-) create mode 100644 src/BandedPlusSemiseparableMatrix.jl rename src/{BandedPlusSemiseparableMatrices.jl => semiseparableqr.jl} (94%) diff --git a/src/BandedPlusSemiseparableMatrix.jl b/src/BandedPlusSemiseparableMatrix.jl new file mode 100644 index 0000000..c8f6ad1 --- /dev/null +++ b/src/BandedPlusSemiseparableMatrix.jl @@ -0,0 +1,33 @@ +struct BandedPlusSemiseparableMatrix{T} <: LayoutMatrix{T} + # representing B + tril(UV', -1) + triu(WS', 1) + B::BandedMatrix{T, Matrix{T}, Base.OneTo{Int}} + U::Matrix{T} + V::Matrix{T} + W::Matrix{T} + S::Matrix{T} +end + +function BandedPlusSemiseparableMatrix(B, (U,V), (W,S)) + if size(U,1) == size(V,1) == size(W,1) == size(S,1) == size(B,1) == size(B,2) && size(U,2) == size(V,2) && size(W,2) == size(S,2) + BandedPlusSemiseparableMatrix(B, U, V, W, S) + else +throw(DimensionMismatch("Dimensions are not compatible.")) + end +end + +size(A::BandedPlusSemiseparableMatrix) = size(A.B) +copy(A::BandedPlusSemiseparableMatrix) = A # not mutable + +function getindex(A::BandedPlusSemiseparableMatrix, k::Integer, j::Integer) + if j > k + view(A.W, k, :)' * view(A.S, j, :) + A.B[k,j] + elseif k > j + view(A.U, k, :)' * view(A.V, j, :) + A.B[k,j] + else + A.B[k,j] + end +end + +function ldiv!(R::UpperTriangular{<:Any,<:BandedPlusSemiseparableMatrix}, b::StridedVector) + error("implement fast back substitution") +end \ No newline at end of file diff --git a/src/SemiseparableMatrices.jl b/src/SemiseparableMatrices.jl index 7d43af1..28c8b8b 100644 --- a/src/SemiseparableMatrices.jl +++ b/src/SemiseparableMatrices.jl @@ -2,9 +2,9 @@ module SemiseparableMatrices using LinearAlgebra: BlasFloat using ArrayLayouts, BandedMatrices, LazyArrays, LinearAlgebra, MatrixFactorizations, Base -import Base: size, getindex, setindex!, convert, copyto!, copy, axes +import Base: size, getindex, setindex!, convert, copyto!, copy, axes, getproperty import MatrixFactorizations: QR, QRPackedQ, getQ, getR, QRPackedQLayout, AdjQRPackedQLayout -import LinearAlgebra: qr, qr!, lmul!, ldiv!, rmul!, triu!, factorize, rank +import LinearAlgebra: qr, qr!, lmul!, ldiv!, rmul!, triu!, factorize, rank, AdjointQ import BandedMatrices: _banded_qr!, bandeddata, resize import LazyArrays: arguments, applylayout, _cache, CachedArray, CachedMatrix, ApplyLayout, resizedata!, PaddedRows import ArrayLayouts: MemoryLayout, sublayout, sub_materialize, MatLdivVec, materialize!, triangularlayout, @@ -30,6 +30,7 @@ separablerank(A) = size(arguments(ApplyLayout{typeof(*)}(),A)[1],2) include("SemiseparableMatrix.jl") include("AlmostBandedMatrix.jl") include("invbanded.jl") -include("BandedPlusSemiseparableMatrices.jl") +include("BandedPlusSemiseparableMatrix.jl") +include("semiseparableqr.jl") end # module diff --git a/src/BandedPlusSemiseparableMatrices.jl b/src/semiseparableqr.jl similarity index 94% rename from src/BandedPlusSemiseparableMatrices.jl rename to src/semiseparableqr.jl index 76d6870..79b8678 100644 --- a/src/BandedPlusSemiseparableMatrices.jl +++ b/src/semiseparableqr.jl @@ -1,33 +1,3 @@ -struct BandedPlusSemiseparableMatrix{T} <: LayoutMatrix{T} - # representing B + tril(UV', -1) + triu(WS', 1) - B::BandedMatrix{T} - U::Matrix{T} - V::Matrix{T} - W::Matrix{T} - S::Matrix{T} -end - -function BandedPlusSemiseparableMatrix(B, (U,V), (W,S)) - if size(U,1) == size(V,1) == size(W,1) == size(S,1) == size(B,1) == size(B,2) && size(U,2) == size(V,2) && size(W,2) == size(S,2) - BandedPlusSemiseparableMatrix(B, U, V, W, S) - else -throw(DimensionMismatch("Dimensions are not compatible.")) - end -end - -size(A::BandedPlusSemiseparableMatrix) = size(A.B) -copy(A::BandedPlusSemiseparableMatrix) = A # not mutable - -function getindex(A::BandedPlusSemiseparableMatrix, k::Integer, j::Integer) - if j > k - view(A.W, k, :)' * view(A.S, j, :) + A.B[k,j] - elseif k > j - view(A.U, k, :)' * view(A.V, j, :) + A.B[k,j] - else - A.B[k,j] - end -end - """ Represents factors matrix for QR for banded+semiseparable. we have @@ -42,7 +12,7 @@ Represents factors matrix for QR for banded+semiseparable. we have """ struct BandedPlusSemiseparableQRPerturbedFactors{T} <: LayoutMatrix{T} - B::BandedMatrix{T} # lower bandwidth l and upper bandwidth l + m + B::BandedMatrix{T, Matrix{T}, Base.OneTo{Int}} # lower bandwidth l and upper bandwidth l + m U::Matrix{T} # n × r V::Matrix{T} # n × r W::Matrix{T} # n × (p+r) @@ -394,4 +364,28 @@ function fast_UᵀA(U, V, W, S, B, j) UᵀW .+= view(U, i, :) * (view(W, i, :))' end UᵀA +end + + + +### +# Support ldiv! +### + +function getproperty(F::QR{<:Any,<:BandedPlusSemiseparableMatrix}, d::Symbol) + m, n = size(F) + if d === :R + return UpperTriangular(getfield(F, :factors)) + elseif d === :Q + return QRPackedQ(getfield(F, :factors), F.τ) + else + getfield(F, d) + end +end + +function lmul!(adjQ::AdjointQ{<:Any,<:QRPackedQ{<:Any,<:BandedPlusSemiseparableMatrix}}, B::StridedVector) + Q = parent(adjQ) + factors = Q.factors + τ = Q.τ + error("implement") end \ No newline at end of file diff --git a/test/test_bandedplussemiseparable.jl b/test/test_bandedplussemiseparable.jl index 20d2d2d..67b24a3 100644 --- a/test/test_bandedplussemiseparable.jl +++ b/test/test_bandedplussemiseparable.jl @@ -6,5 +6,12 @@ using SemiseparableMatrices, Test B = brandn(n,n,l,m) U,V = randn(n,r), randn(n,r) W,S = randn(n,p), randn(n,p) - @test BandedPlusSemiseparableMatrix(B, (U,V), (W,S)) ≈ B + tril(U*V',-1) + triu(W*S',1) + A = BandedPlusSemiseparableMatrix(B, (U,V), (W,S)) + @test @inferred(size(A)) == (20,20) + @test A ≈ B + tril(U*V',-1) + triu(W*S',1) + + @test_broken A[1:10,1:10] isa BandedPlusSemiseparableMatrix + + b = randn(n) + @test ldiv!(UpperTriangular(A), copy(b)) ≈ UpperTriangular(Matrix(A)) \ b end \ No newline at end of file diff --git a/test/test_qr.jl b/test/test_qr.jl index 57a3ade..be3b4d3 100644 --- a/test/test_qr.jl +++ b/test/test_qr.jl @@ -11,8 +11,9 @@ using SemiseparableMatrices: BandedPlusSemiseparableQRPerturbedFactors U,V = randn(n,r), randn(n,r) W,S = randn(n,p), randn(n,p) A = BandedPlusSemiseparableQRPerturbedFactors(B, (U,V), (W,S)) + @test @inferred(size(A)) == (20,20) fact_true = LinearAlgebra.qrfactUnblocked!(Matrix(A)) - fact = qr!(A) + fact = @inferred(qr!(A)) @test A ≈ fact_true.factors ≈ fact.factors @test fact_true.τ ≈ fact.τ end @@ -27,5 +28,11 @@ using SemiseparableMatrices: BandedPlusSemiseparableQRPerturbedFactors @test A ≈ A_true @test fact_true.factors ≈ fact.factors @test fact_true.τ ≈ fact.τ + + b = randn(n) + Q,R = fact + + lmul!(Q',b) + ldiv!(R, b) end end \ No newline at end of file From 24bc3fdff1f5d6643b1186e8b74146b23c968e03 Mon Sep 17 00:00:00 2001 From: Sheehan Olver Date: Thu, 20 Nov 2025 15:46:19 +0000 Subject: [PATCH 17/19] Update test_qr.jl --- test/test_qr.jl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/test/test_qr.jl b/test/test_qr.jl index be3b4d3..ee50a4a 100644 --- a/test/test_qr.jl +++ b/test/test_qr.jl @@ -32,6 +32,8 @@ using SemiseparableMatrices: BandedPlusSemiseparableQRPerturbedFactors b = randn(n) Q,R = fact + @test R ≡ UpperTriangular(fact.factors) + lmul!(Q',b) ldiv!(R, b) end From 5a315f7b29fc6548bc2248d9cdc7340e031cb29e Mon Sep 17 00:00:00 2001 From: TaoChenImperial Date: Wed, 3 Dec 2025 12:29:37 +0000 Subject: [PATCH 18/19] support lmul and ldiv for bps matrix and improve qr efficiency by using mul! --- src/BandedPlusSemiseparableMatrix.jl | 15 +- src/semiseparableqr.jl | 351 ++++++++++++++++++++------- test/test_qr.jl | 18 +- 3 files changed, 287 insertions(+), 97 deletions(-) diff --git a/src/BandedPlusSemiseparableMatrix.jl b/src/BandedPlusSemiseparableMatrix.jl index c8f6ad1..914782b 100644 --- a/src/BandedPlusSemiseparableMatrix.jl +++ b/src/BandedPlusSemiseparableMatrix.jl @@ -29,5 +29,18 @@ function getindex(A::BandedPlusSemiseparableMatrix, k::Integer, j::Integer) end function ldiv!(R::UpperTriangular{<:Any,<:BandedPlusSemiseparableMatrix}, b::StridedVector) - error("implement fast back substitution") + F = parent(R) + n, p = size(F.W) + l, m = bandwidths(F.B) + T = eltype(F.S) + sx = zeros(T, p) + for j = n : -1 : 1 + residual = view(F.W, j, :)' * sx + for k = j+1 : min(j+m, n) + residual += F.B[j, k] * b[k] + end + b[j] = (b[j] - residual) / F.B[j, j] + #sx += F.S[j, :] * b[j] + mul!(sx, I, view(F.S, j, :), b[j], one(T)) + end end \ No newline at end of file diff --git a/src/semiseparableqr.jl b/src/semiseparableqr.jl index 79b8678..e2112ca 100644 --- a/src/semiseparableqr.jl +++ b/src/semiseparableqr.jl @@ -38,12 +38,12 @@ function BandedPlusSemiseparableQRPerturbedFactors(B, (U,V), (W,S)) n, r = size(U) p = size(W,2) l, m = bandwidths(B) - # A = tril(U*V',-1) + B + triu(W*S',1) + # A is tril(U*V',-1) + B + triu(W*S',1) AᵀU = (fast_UᵀA(U, V, W, S, B, 1))' BandedPlusSemiseparableQRPerturbedFactors(BandedMatrix(B,(l,l+m)),U,V,[W zeros(n,r)],[S AᵀU], zeros(r,p),zeros(r,r),zeros(r,min(l+m,n)),zeros(min(l,n),p),zeros(min(l,n),r),zeros(min(l,n),min(l+m,n)),Ref(0)) else - throw(ErrorException("Dimensions not match!")) + throw(DimensionMismatch("Dimensions are not compatible.")) end end @@ -105,8 +105,13 @@ function qr!(A::BandedPlusSemiseparableQRPerturbedFactors{T}) where T if A.j[] != 0 throw(ErrorException("Matrix has already been partially upper-triangularized")) end - - bandedplussemi_qr!(A, zeros(T,size(A, 1)), UᵀU_lookup_table(A), ūw̄_sum_lookup_table(A), d_extra_lookup_table(A)) + Q_prev = similar(A.Q) + K_prev = similar(A.K) + Eₛ_prev = similar(A.Eₛ) + Xₛ_prev = similar(A.Xₛ) + Yₛ_prev = similar(A.Yₛ) + Zₛ_prev = similar(A.Zₛ) + bandedplussemi_qr!(A, zeros(T,size(A, 1)), UᵀU_lookup_table(A), ūw̄_sum_lookup_table(A), d_extra_lookup_table(A), Q_prev, K_prev, Eₛ_prev, Xₛ_prev, Yₛ_prev, Zₛ_prev) end function bandedplussemi_qr!(A, τ, tables...) @@ -115,7 +120,7 @@ function bandedplussemi_qr!(A, τ, tables...) onestep_qr!(A, τ, tables...) end - A.B[n,n] = A[n,n] + A.B[n,n] = A[n,n] # after n-1 HT the lower right 1 by 1 matrix is still represented by Q, K, etc. Simply get this value and assign it to the diagonal so the whole matrix is BPS now. A.j[] += 1 QR(A, τ) end @@ -125,18 +130,18 @@ function qr(A::BandedPlusSemiseparableMatrix) QR(BandedPlusSemiseparableMatrix(F.factors), F.τ) end -function onestep_qr!(A, τ, UᵀU, ūw̄_sum, d_extra) +function onestep_qr!(A, τ, UᵀU, ūw̄_sum, d_extra, Q_prev, K_prev, Eₛ_prev, Xₛ_prev, Yₛ_prev, Zₛ_prev) k̄, b, pivot_new, τ_current = compute_Householder_vector(A, UᵀU) # I- τyy' where y = eⱼ₊₁ + U⁽ʲ⁺²⁾k̄ + b and p_new will be the diagonal element on the current column τ[A.j[]+1] = τ_current w̄₁, ū₁, d₁, f, d̄ = compute_d₁_and_d̄(A, b) c₁, c₂, c₃, c₄, c₅, c₆ = compute_variables_c(A, k̄, b, UᵀU) - Q_prev = copy(A.Q) - K_prev = copy(A.K) - Eₛ_prev = copy(A.Eₛ) - Xₛ_prev = copy(A.Xₛ) - Yₛ_prev = copy(A.Yₛ) - Zₛ_prev = copy(A.Zₛ) + Q_prev .= A.Q + K_prev .= A.K + Eₛ_prev .= A.Eₛ + Xₛ_prev .= A.Xₛ + Yₛ_prev .= A.Yₛ + Zₛ_prev .= A.Zₛ update_next_submatrix!(A, k̄, b, τ_current, w̄₁, ū₁, d₁, f, d̄, c₁, c₂, c₃, c₄, c₅, c₆, Q_prev, K_prev, Eₛ_prev, Xₛ_prev, Yₛ_prev, Zₛ_prev) update_upper_triangular_part!(A, k̄, b, τ_current, w̄₁, ū₁, d₁, f, d̄, c₁, c₂, c₃, c₄, c₅, c₆, Q_prev, K_prev, Eₛ_prev, Xₛ_prev, Yₛ_prev, Zₛ_prev, ūw̄_sum, d_extra) @@ -153,20 +158,32 @@ function compute_Householder_vector(A, UᵀU) # first express A[j+1:end,j+1] as p*eⱼ₊₁ + U⁽ʲ⁺²⁾k̄ + b j = A.j[] - n = size(A.B, 1) + n = size(A, 1) p = size(A.W, 2) - size(A.U, 2) # W already been padded to have size n × (p+r) l, m = bandwidths(A.B) m = m - l # B already been padded to have upper bandwidth l+m - UᵀA_1 = (view(A.U, j+1:min(l+j+1, n),:))' * view(A.B, j+1:min(l+j+1, n) , j+1) + view(UᵀU, j+2 , : , :) * view(A.V, j+1, :) #the j+1th column of UᵀA - - k̄ = view(A.V, j+1, :) + A.Q * view(A.S, j+1, 1:p) + A.K * UᵀA_1 + T = eltype(A) + # UᵀA_1 = (A.U[j+1:min(A.l+j+1,A.n),:])'*A.B[j+1:min(A.l+j+1,A.n),j+1] + UᵀU[j+2,:,:]*A.V[j+1,:], the j+1th column of UᵀA + UᵀA_1 = (view(A.U, j+1:min(l+j+1, n),:))' * view(A.B, j+1:min(l+j+1, n) , j+1) # matrix allocation + mul!(UᵀA_1, view(UᵀU, j+2 , : , :), view(A.V, j+1, :), one(T), one(T)) + + #k̄ = A.V[j+1,:] + A.Q * A.S[j+1,1:A.p] + A.K * UᵀA_1 + k̄ = A.K * UᵀA_1 # matrix accolation + mul!(k̄, A.Q, view(A.S, j+1, 1:p), one(T), one(T)) + k̄ .+= view(A.V, j+1, :) if l > 0 || m > 0 k̄ .+= view(A.Eₛ, :, 1) end - b = A.B[j+2:min(l+j+1, n), j+1] + b = A.B[j+2:min(l+j+1, n), j+1] # matrix allocation if l > 0 || m > 0 - b[1:min(l-1,length(b))] .+= view((A.Xₛ * view(A.S, j+1, 1:p) + A.Yₛ * UᵀA_1 + view(A.Zₛ, :, 1)), 2:min(l,length(b)+1)) + #b[1:min(A.l-1,length(b))] += (A.Xₛ * A.S[j+1,1:A.p] + A.Yₛ * UᵀA_1 + A.Zₛ[:,1])[2:min(A.l,length(b)+1)] + kr = 2:min(l,length(b)+1) + v = view(b, kr .- 1) + mul!(v, view(A.Xₛ, kr,:), view(A.S, j+1, 1:p), one(T), one(T)) + mul!(v, view(A.Yₛ,kr,:), UᵀA_1, one(T), one(T)) + v .+= view(A.Zₛ, kr, 1) end + #pivot = A.B[j+1,j+1] + A.U[j+1,:]'A.Q * A.S[j+1,1:A.p] + A.U[j+1,:]'A.K*UᵀA_1 pivot = A.B[j+1,j+1] + (view(A.U, j+1, :))' * A.Q * view(A.S, j+1, 1:p) + view(A.U, j+1, :)' * A.K * UᵀA_1 if l > 0 pivot += (view(A.U, j+1 , :))' * view(A.Eₛ, :, 1) + (view(A.Xₛ, 1 , :))' * view(A.S, j+1 , 1:p) + (view(A.Yₛ, 1, :))' * UᵀA_1 + A.Zₛ[1,1] @@ -189,53 +206,82 @@ end function compute_d₁_and_d̄(A, b) j = A.j[] - n = size(A.B, 1) + n = size(A, 1) p = size(A.W, 2) - size(A.U, 2) # W already been padded to have size n × (p+r) l, m = bandwidths(A.B) m = m - l # B already been padded to have upper bandwidth l+m + T = eltype(A) w̄₁ = view(A.W, j+1, 1:p) ū₁ = view(A.U, j+1, :) - d₁ = A.B[j+1,j+1:min(j+1+m, n)] + d₁ = A.B[j+1,j+1:min(j+1+m, n)] # matrix allocation d₁[1] = d₁[1] - w̄₁'*(view(A.S, j+1, 1:p)) - f = (view(A.W, j+2:j+1+length(b), 1:p))' * b + f = (view(A.W, j+2:j+1+length(b), 1:p))' * b # matrix allocation index1 = j+2:j+1+length(b) index2 = j+1:min(j+1+m+l, n) tril_sub = [ (ii - jj) >= 1 for ii in index1, jj in index2 ] triu_sub = [ (jj - ii) >= 1 for ii in index1, jj in index2 ] - A₀ = view(A.U, index1, :) * view(A.V, index2, :)' .* tril_sub + view(A.B, index1, index2) + view(A.W, index1, 1:p) * view(A.S, index2, 1:p)' .* triu_sub - d̄ = (b'A₀ - f'*(view(A.S, index2, 1:p))')' + #A₀ = A.U[index1,:]*A.V[index2,:]'.*tril_sub + A.B[index1,index2] + A.W[index1,1:A.p]*A.S[index2,1:A.p]'.*triu_sub + A₀ = view(A.U, index1, :) * view(A.V, index2, :)' # matrix allocation + A₀ .*= tril_sub + A₀ .+= view(A.B, index1, index2) + A₀_up = view(A.W, index1, 1:p) * view(A.S, index2, 1:p)' # matrix allocation + A₀_up .*= triu_sub + A₀ .+= A₀_up + #d̄ = (b'A₀ - f'*(A.S[index2,1:A.p])')' + d̄ = A₀'b # matrix allocation + mul!(d̄, view(A.S, index2, 1:p), f, -one(T), one(T)) w̄₁, ū₁, d₁, f, d̄ end function compute_variables_c(A, k̄, b, UᵀU) j = A.j[] - n = size(A.B, 1) + n = size(A, 1) l, m = bandwidths(A.B) m = m - l # B already been padded to have upper bandwidth l+m + T = eltype(A) UᵀU⁽²⁾ = view(UᵀU, j+2, :, :) - c₁ = (A.Q)' * view(A.U, j+1, :) + (A.Q)' * UᵀU⁽²⁾ * k̄ + (A.Q)' * (view(A.U, j+2:j+1+length(b), :))' * b - c₂ = (A.K)' * view(A.U, j+1, :) + (A.K)' * UᵀU⁽²⁾ * k̄ + (A.K)' * (view(A.U, j+2:j+1+length(b), :))' * b - c₃ = view(A.U, j+1, :) + UᵀU⁽²⁾ * k̄ + (view(A.U, j+2:j+1+length(b), :))' * b + #c₁ = (A.Q)' * A.U[j+1,:] + (A.Q)' * UᵀU⁽²⁾ * k̄ + (A.Q)' * (A.U[j+2:j+1+length(b),:])' * b + c₁ = (A.Q)' * view(A.U, j+1, :) # matrix allocation + mul!(c₁, (A.Q)' * UᵀU⁽²⁾, k̄, one(T), one(T)) + mul!(c₁, (A.Q)' * (view(A.U, j+2:j+1+length(b), :))', b, one(T), one(T)) + #c₂ = (A.K)' * A.U[j+1,:] + (A.K)' * UᵀU⁽²⁾ * k̄ + (A.K)' * (A.U[j+2:j+1+length(b),:])' * b + c₂ = (A.K)' * view(A.U, j+1, :) # matrix allocation + mul!(c₂, (A.K)' * UᵀU⁽²⁾, k̄, one(T), one(T)) + mul!(c₂, (A.K)' * (view(A.U, j+2:j+1+length(b), :))', b, one(T), one(T)) + #c₃ = A.U[j+1,:] + UᵀU⁽²⁾ * k̄ + (A.U[j+2:j+1+length(b),:])' * b + c₃ = UᵀU⁽²⁾ * k̄ # matrix allocation + c₃ .+= view(A.U, j+1, :) + mul!(c₃, view(A.U, j+2:j+1+length(b), :)', b, one(T), one(T)) Xₛ_valid = view(A.Xₛ, 2:min(l, n-j), :) Yₛ_valid = view(A.Yₛ, 2:min(l, n-j), :) Zₛ_valid = view(A.Zₛ, 2:min(l, n-j), 1:min(l+m, n-j)) - c₄ = (Xₛ_valid)' * view(A.U, j+2:j+1+size(Xₛ_valid,1), :) * k̄ + (Xₛ_valid)' * view(b, 1:size(Xₛ_valid,1)) - c₅ = (Yₛ_valid)' * view(A.U, j+2:j+1+size(Yₛ_valid,1), :) * k̄ + (Yₛ_valid)' * view(b, 1:size(Yₛ_valid,1)) - c₆ = (Zₛ_valid)' * view(A.U, j+2:j+1+size(Zₛ_valid,1), :) * k̄ + (Zₛ_valid)' * view(b, 1:size(Zₛ_valid,1)) + #c₄ = (Xₛ_valid)' * A.U[j+2:j+1+size(Xₛ_valid,1),:] * k̄ + (Xₛ_valid)' * b[1:size(Xₛ_valid,1)] + c₄ = (Xₛ_valid)' * view(b, 1:size(Xₛ_valid,1)) # matrix allocation + mul!(c₄, (Xₛ_valid)' * view(A.U, j+2:j+1+size(Xₛ_valid,1), :), k̄, one(T), one(T)) + #c₅ = (Yₛ_valid)' * A.U[j+2:j+1+size(Yₛ_valid,1),:] * k̄ + (Yₛ_valid)' * b[1:size(Yₛ_valid,1)] + c₅ = (Yₛ_valid)' * view(b, 1:size(Yₛ_valid,1)) # matrix allocation + mul!(c₅, (Yₛ_valid)' * view(A.U, j+2:j+1+size(Yₛ_valid,1), :), k̄, one(T), one(T)) + #c₆ = (Zₛ_valid)' * A.U[j+2:j+1+size(Zₛ_valid,1),:] * k̄ + (Zₛ_valid)' * b[1:size(Zₛ_valid,1)] + c₆ = (Zₛ_valid)' * view(b, 1:size(Zₛ_valid,1)) # matrix allocation + mul!(c₆, (Zₛ_valid)' * view(A.U, j+2:j+1+size(Zₛ_valid,1), :), k̄, one(T), one(T)) if l > 0 + #c₄ += A.Xₛ[1,:] c₄ .+= view(A.Xₛ, 1, :) + #c₅ += A.Yₛ[1,:] c₅ .+= view(A.Yₛ, 1, :) + #c₆ += A.Zₛ[1,1:min(A.l+A.m, A.n-j)] c₆ .+= view(A.Zₛ, 1, 1:min(l+m, n-j)) end c₁, c₂, c₃, c₄, c₅, c₆ end -function update_next_submatrix!(A::AbstractMatrix{T}, k̄, b, τ, w̄₁, ū₁, d₁, f, d̄, c₁, c₂, c₃, c₄, c₅, c₆, Q_prev, K_prev, Eₛ_prev, Xₛ_prev, Yₛ_prev, Zₛ_prev) where T +function update_next_submatrix!(A, k̄, b, τ, w̄₁, ū₁, d₁, f, d̄, c₁, c₂, c₃, c₄, c₅, c₆, Q_prev, K_prev, Eₛ_prev, Xₛ_prev, Yₛ_prev, Zₛ_prev) # A.Q .= -τ*k̄*w̄₁' - τ*k̄*f' + Q_prev - τ*k̄*c₁' + K_prev*ū₁*w̄₁'- # τ*k̄*c₂'*ū₁*w̄₁' - τ*k̄*c₄' - τ*k̄*c₅'*ū₁*w̄₁' + T = eltype(A) mul!(A.Q, k̄, w̄₁', -τ, one(T)) mul!(A.Q, k̄, f', -τ, one(T)) mul!(A.Q, k̄, c₁', -τ, one(T)) @@ -243,81 +289,138 @@ function update_next_submatrix!(A::AbstractMatrix{T}, k̄, b, τ, w̄₁, ū₁ mul!(A.Q, k̄, w̄₁', -τ*(c₂'*ū₁+c₅'*ū₁), one(T)) mul!(A.Q, k̄, c₄', -τ, one(T)) - - - - - A.K .= -τ*k̄*k̄' + K_prev - τ*k̄*c₂' - τ*k̄*c₅' - - A.Eₛ[:,1:length(d̄)-1] = -τ*k̄*(view(d̄, 2:length(d̄)))' - A.Eₛ[:,1:length(d₁)-1] .+= (-τ*k̄ + K_prev*ū₁ - τ*k̄*c₂'*ū₁ - τ*k̄*c₅'*ū₁)*(view(d₁, 2:length(d₁)))' - A.Eₛ[:,1:end-1] .+= view(Eₛ_prev, :, 2:size(Eₛ_prev,2)) - τ*k̄*c₃'*view(Eₛ_prev, :, 2:size(Eₛ_prev,2)) - A.Eₛ[:,1:length(c₆)-1] .+= -τ*k̄*(view(c₆, 2:length(c₆)))' - A.Eₛ[:,length(d̄):end] .= zero(eltype(A)) - - A.Xₛ[1:length(b),:] = b*(-τ*w̄₁' - τ*f' - τ*c₁' - τ*c₂'*ū₁*w̄₁' - τ*c₄' - τ*c₅'*ū₁*w̄₁') - A.Xₛ[1:end-1,:] .+= view(Xₛ_prev, 2:size(Xₛ_prev,1), :) + view(Yₛ_prev, 2:size(Yₛ_prev,1), :)*ū₁*w̄₁' - A.Xₛ[length(b)+1:end,:] .= zero(eltype(A)) - - A.Yₛ[1:length(b),:] = b*(-τ*k̄' - τ*c₂' - τ*c₅') - A.Yₛ[1:end-1,:] .+= view(Yₛ_prev, 2:size(Yₛ_prev,1), :) - A.Yₛ[length(b)+1:end,:] .= zero(eltype(A)) - - A.Zₛ[1:length(b), 1:length(d̄)-1] = -τ*b*(view(d̄, 2:length(d̄)))' - A.Zₛ[1:length(b), 1:length(d₁)-1] .+= b*(-τ - τ*c₂'*ū₁ - τ*c₅'*ū₁)*(view(d₁, 2:length(d₁)))' - A.Zₛ[1:length(b), 1:end-1] .+= -τ*b*c₃'*view(Eₛ_prev, :, 2:size(Eₛ_prev, 2)) - A.Zₛ[1:end-1, 1:length(d₁)-1] .+= view(Yₛ_prev, 2:size(Yₛ_prev,1), :)*ū₁*(view(d₁, 2:length(d₁)))' - A.Zₛ[1:end-1, 1:end-1] .+= view(Zₛ_prev, 2:size(Zₛ_prev,1), 2:size(Zₛ_prev,2)) - A.Zₛ[1:length(b), 1:length(c₆)-1] .+= -τ*b*(view(c₆, 2:length(c₆)))' - A.Zₛ[length(b)+1:end,:] .= zero(eltype(A)) - A.Zₛ[:,length(d̄):end] .= zero(eltype(A)) + # A.K .= -τ*k̄*k̄' + K_prev - τ*k̄*c₂' - τ*k̄*c₅' + mul!(A.K, k̄, k̄', -τ, one(T)) + mul!(A.K, k̄, c₂', -τ, one(T)) + mul!(A.K, k̄, c₅', -τ, one(T)) + + #A.Eₛ[:,1:length(d̄)-1] = -τ*k̄*(d̄[2:end])' + mul!(view(A.Eₛ, :, 1:length(d̄)-1), k̄, view(d̄, 2:length(d̄))', -τ, zero(T)) + #A.Eₛ[:,1:length(d₁)-1] += (-τ*k̄ + K_prev*ū₁ - τ*k̄*c₂'*ū₁ - τ*k̄*c₅'*ū₁)*(d₁[2:end])' + mul!(view(A.Eₛ, :, 1:length(d₁)-1), k̄, view(d₁, 2:length(d₁))', -τ, one(T)) + mul!(view(A.Eₛ, :, 1:length(d₁)-1), K_prev*ū₁, view(d₁, 2:length(d₁))', one(T), one(T)) + mul!(view(A.Eₛ, :, 1:length(d₁)-1), k̄*c₂'*ū₁, view(d₁, 2:length(d₁))', -τ, one(T)) + mul!(view(A.Eₛ, :, 1:length(d₁)-1), k̄*c₅'*ū₁, view(d₁, 2:length(d₁))', -τ, one(T)) + #A.Eₛ[:,1:end-1] += Eₛ_prev[:,2:end] - τ*k̄*c₃'*Eₛ_prev[:,2:end] + view(A.Eₛ, :, 1:size(A.Eₛ,2)-1) .+= view(Eₛ_prev, :, 2:size(Eₛ_prev,2)) + mul!(view(A.Eₛ, :, 1:size(A.Eₛ,2)-1), k̄*c₃', view(Eₛ_prev, :, 2:size(Eₛ_prev,2)), -τ, one(T)) + #A.Eₛ[:,1:length(c₆)-1] += -τ*k̄*(c₆[2:end])' + mul!(view(A.Eₛ, :, 1:length(c₆)-1), k̄, view(c₆, 2:length(c₆))', -τ, one(T)) + #A.Eₛ[:,length(d̄):end] .= 0 + view(A.Eₛ, :, length(d̄):size(A.Eₛ,2)) .= zero(eltype(A)) + + #A.Xₛ[1:length(b),:] = b*(-τ*w̄₁' - τ*f' - τ*c₁' - τ*c₂'*ū₁*w̄₁' - τ*c₄' - τ*c₅'*ū₁*w̄₁') + mul!(view(A.Xₛ, 1:length(b),:), b, w̄₁', -τ, zero(T)) + mul!(view(A.Xₛ, 1:length(b),:), b, f', -τ, one(T)) + mul!(view(A.Xₛ, 1:length(b),:), b, c₁', -τ, one(T)) + mul!(view(A.Xₛ, 1:length(b),:), b, c₂'*ū₁*w̄₁', -τ, one(T)) + mul!(view(A.Xₛ, 1:length(b),:), b, c₄', -τ, one(T)) + mul!(view(A.Xₛ, 1:length(b),:), b, c₅'*ū₁*w̄₁', -τ, one(T)) + #A.Xₛ[1:end-1,:] += Xₛ_prev[2:end,:] + Yₛ_prev[2:end,:]*ū₁*w̄₁' + view(A.Xₛ, 1:size(A.Xₛ,1)-1, :) .+= view(Xₛ_prev, 2:size(Xₛ_prev,1), :) + mul!(view(A.Xₛ, 1:size(A.Xₛ,1)-1, :), view(Yₛ_prev, 2:size(Yₛ_prev,1), :), ū₁*w̄₁', one(T), one(T)) + #A.Xₛ[length(b)+1:end,:] .= zero(eltype(A)) + view(A.Xₛ, length(b)+1:size(A.Xₛ,1), :) .= zero(eltype(A)) + + #A.Yₛ[1:length(b),:] = b*(-τ*k̄' - τ*c₂' - τ*c₅') + mul!(view(A.Yₛ, 1:length(b), :), b, k̄', -τ, zero(T)) + mul!(view(A.Yₛ, 1:length(b), :), b, c₂', -τ, one(T)) + mul!(view(A.Yₛ, 1:length(b), :), b, c₅', -τ, one(T)) + #A.Yₛ[1:end-1,:] += Yₛ_prev[2:end,:] + view(A.Yₛ, 1:size(A.Yₛ,1)-1, :) .+= view(Yₛ_prev, 2:size(Yₛ_prev,1), :) + #A.Yₛ[length(b)+1:end,:] .= 0 + view(A.Yₛ, length(b)+1:size(A.Yₛ,1), :) .= zero(eltype(A)) + + #A.Zₛ[1:length(b), 1:length(d̄)-1] = -τ*b*(d̄[2:end])' + mul!(view(A.Zₛ, 1:length(b), 1:length(d̄)-1), b, view(d̄, 2:length(d̄))', -τ, zero(T)) + #A.Zₛ[1:length(b), 1:length(d₁)-1] += b*(-τ - τ*c₂'*ū₁ - τ*c₅'*ū₁)*(d₁[2:end])' + mul!(view(A.Zₛ, 1:length(b), 1:length(d₁)-1), b, view(d₁, 2:length(d₁))', -τ, one(T)) + mul!(view(A.Zₛ, 1:length(b), 1:length(d₁)-1), b*c₂'*ū₁, view(d₁, 2:length(d₁))', -τ, one(T)) + mul!(view(A.Zₛ, 1:length(b), 1:length(d₁)-1), b*c₅'*ū₁, view(d₁, 2:length(d₁))', -τ, one(T)) + #A.Zₛ[1:length(b), 1:end-1] += -τ*b*c₃'*Eₛ_prev[:,2:end] + mul!(view(A.Zₛ, 1:length(b), 1:size(A.Zₛ,2)-1), b*c₃', view(Eₛ_prev, :, 2:size(Eₛ_prev, 2)), -τ, one(T)) + #A.Zₛ[1:end-1, 1:length(d₁)-1] += Yₛ_prev[2:end,:]*ū₁*(d₁[2:end])' + mul!(view(A.Zₛ, 1:size(A.Zₛ,1)-1, 1:length(d₁)-1), view(Yₛ_prev, 2:size(Yₛ_prev,1), :)*ū₁, view(d₁, 2:length(d₁))', one(T), one(T)) + #A.Zₛ[1:end-1, 1:end-1] += Zₛ_prev[2:end, 2:end] + view(A.Zₛ, 1:size(A.Zₛ,1)-1, 1:size(A.Zₛ,2)-1) .+= view(Zₛ_prev, 2:size(Zₛ_prev,1), 2:size(Zₛ_prev,2)) + #A.Zₛ[1:length(b), 1:length(c₆)-1] += -τ*b*(c₆[2:end])' + mul!(view(A.Zₛ, 1:length(b), 1:length(c₆)-1), b, view(c₆, 2:length(c₆))', -τ, one(T)) + #A.Zₛ[length(b)+1:end,:] .= 0 + view(A.Zₛ, length(b)+1:size(A.Zₛ,1),:) .= zero(eltype(A)) + #A.Zₛ[:,length(d̄):end] .= 0 + view(A.Zₛ, :, length(d̄):size(A.Zₛ,2)) .= zero(eltype(A)) end function update_upper_triangular_part!(A, k̄, b, τ, w̄₁, ū₁, d₁, f, d̄, c₁, c₂, c₃, c₄, c₅, c₆, Q, K, Eₛ, Xₛ, Yₛ, Zₛ, ūw̄_sum, d_extra) j = A.j[] p = size(A.W, 2) - size(A.U, 2) # W already been padded to have size n × (p+r) - β = (-τ*k̄' + ū₁'*K - τ*c₂' - τ*c₅')' + T = eltype(A) + #β = (-τ*k̄' + ū₁'*K - τ*c₂' - τ*c₅')' + β = -τ*k̄ # matrix allocation + mul!(β, K', ū₁, one(T), one(T)) + mul!(β, I, c₂, -τ, one(T)) + mul!(β, I, c₅, -τ, one(T)) if size(Yₛ, 1) > 0 β .+= view(Yₛ, 1, :) end - α = (w̄₁' - τ*w̄₁' - τ*f' + ū₁'*Q - τ*c₁' - τ*c₄' + τ*k̄'*ū₁*w̄₁')' + #α = (w̄₁' - τ*w̄₁' - τ*f' + ū₁'*Q - τ*c₁' - τ*c₄' + τ*k̄'*ū₁*w̄₁')' + α = (1-τ)*w̄₁ # matrix allocation + mul!(α, I, f, -τ, one(T)) + mul!(α, Q', ū₁, one(T), one(T)) + mul!(α, I, c₁, -τ, one(T)) + mul!(α, I, c₄, -τ, one(T)) + mul!(α, w̄₁*ū₁', k̄, τ, one(T)) if size(Xₛ, 1) > 0 α .+= view(Xₛ, 1, :) end - α .+= (-β' * view(ūw̄_sum, j+1, :, :))' - - d = -τ * d̄[2:end] - d[1:length(d₁)-1] .+= (1 - τ + τ*k̄'*ū₁) * view(d₁, 2:length(d₁)) - d[1:min(length(d),size(Eₛ,2)-1)] .+= ((ū₁' - τ*c₃') * view(Eₛ, :, 2:min(length(d)+1,size(Eₛ,2))))' - d[1:length(c₆)-1] .+= -τ * view(c₆, 2:length(c₆)) + #α += (-β'*ūw̄_sum[j+1,:,:])' + mul!(α, view(ūw̄_sum, j+1, :, :)', β, -one(T), one(T)) + + d = -τ * d̄[2:end] # matrix allocation + #d[1:length(d₁)-1] += (1 - τ + τ*k̄'*ū₁)*d₁[2:end] + mul!(view(d, 1:length(d₁)-1), I, view(d₁, 2:length(d₁)), 1 - τ + τ*k̄'*ū₁, one(T)) + #d[1:min(length(d),size(Eₛ,2)-1)] += ((ū₁' - τ*c₃')*Eₛ[:,2:min(length(d)+1,size(Eₛ,2))])' + mul!(view(d, 1:min(length(d),size(Eₛ,2)-1)), view(Eₛ, :, 2:min(length(d)+1,size(Eₛ,2)))', ū₁-τ*c₃, one(T), one(T)) + #d[1:length(c₆)-1] += -τ*c₆[2:end] + mul!(view(d, 1:length(c₆)-1), I, view(c₆, 2:length(c₆)), -τ, one(T)) if size(Zₛ, 1) > 0 + #d[1:min(length(d),size(Zₛ,2)-1)] += Zₛ[1, 2:min(length(d)+1,size(Zₛ,2))] d[1:min(length(d),size(Zₛ,2)-1)] .+= view(Zₛ, 1, 2:min(length(d)+1,size(Zₛ,2))) end d_extra_current = d_extra[j+1] - d[1:size(d_extra_current,2)] .+= (-β'*d_extra_current)' + #d[1:size(d_extra_current,2)] += (-β'*d_extra_current)' + mul!(view(d, 1:size(d_extra_current,2)), d_extra_current', β, -one(T), one(T)) j = A.j[] - A.W[j+1, 1:p] = α - A.W[j+1, p+1:end] = β - A.B[j+1, j+2:j+1+length(d)] = d + #A.W[j+1, 1:p] = α + view(A.W, j+1, 1:p) .= α + #A.W[j+1, p+1:end] = β + view(A.W, j+1, (p+1):size(A.W,2)) .= β + #A.B[j+1, j+2:j+1+length(d)] = d + view(A.B, j+1, j+2:j+1+length(d)) .= d end function update_lower_triangular_part!(A, pivot, k̄, b) j = A.j[] A.B[j+1, j+1] = pivot - A.B[j+2:j+1+length(b), j+1] = b - A.V[j+1, :] = k̄ + #A.B[j+2:j+1+length(b), j+1] = b + view(A.B, j+2:j+1+length(b), j+1) .= b + #A.V[j+1, :] = k̄ + view(A.V, j+1, :) .= k̄ end function UᵀU_lookup_table(A) n, r = size(A.U) - UᵀU = zeros(eltype(A), n, r, r) - UᵀU_current = zeros(eltype(A), r, r) + T = eltype(A) + UᵀU = zeros(T, n, r, r) + UᵀU_current = zeros(T, r, r) for i in n:-1:1 - UᵀU_current .+= view(A.U, i, :) * (view(A.U, i, :))' - UᵀU[i,:,:] .= UᵀU_current + #UᵀU_current += A.U[i,:] * (A.U[i,:])' + mul!(UᵀU_current, view(A.U, i, :), (view(A.U, i, :))', one(T), one(T)) + #UᵀU[i,:,:] .= UᵀU_current + view(UᵀU, i, :, :) .= UᵀU_current end UᵀU end @@ -325,11 +428,14 @@ end function ūw̄_sum_lookup_table(A) n, r = size(A.U) p = size(A.W, 2) - size(A.U, 2) # W already been padded to have size n × (p+r) - ūw̄_sum = zeros(eltype(A), n, r, p) - ūw̄_sum_current = zeros(eltype(A), r, p) + T = eltype(A) + ūw̄_sum = zeros(T, n, r, p) + ūw̄_sum_current = zeros(T, r, p) for t in 1:n - ūw̄_sum[t,:,:] .= ūw̄_sum_current - ūw̄_sum_current .+= view(A.U, t, :) * (view(A.W, t, 1:p))' + #ūw̄_sum[t,:,:] .= ūw̄_sum_current + view(ūw̄_sum, t, :, :) .= ūw̄_sum_current + #ūw̄_sum_current[:,:] .+= A.U[t,:] * (A.W[t,1:A.p])' + mul!(ūw̄_sum_current, view(A.U, t, :), (view(A.W, t, 1:p))', one(T), one(T)) end ūw̄_sum end @@ -338,11 +444,13 @@ function d_extra_lookup_table(A) n, r = size(A.U) l, m = bandwidths(A.B) m = m - l # B already been padded to have upper bandwidth l+m - d_extra = Vector{Matrix{eltype(A)}}() + T = eltype(A) + d_extra = Vector{Matrix{T}}() for i in 1:n - d_extra_current = zeros(eltype(A), r, min(m, n-i)) + d_extra_current = zeros(eltype(A), r, min(m, n-i)) # matrix allocation for t in max(1,i+1-m) : i-1 - d_extra_current .+= view(A.U, t, :) * (view(A.B, t, i+1:i+size(d_extra_current,2)))' + #d_extra_current .+= A.U[t,:] * (A.B[t, i+1:i+size(d_extra_current,2)])' + mul!(d_extra_current, view(A.U, t, :), (view(A.B, t, i+1:i+size(d_extra_current,2)))', one(T), one(T)) end push!(d_extra, d_extra_current) end @@ -355,13 +463,19 @@ function fast_UᵀA(U, V, W, S, B, j) n, r = size(U) p = size(W,2) l, m = bandwidths(B) - UᵀA = zeros(eltype(U), r, n+1-j) + T = eltype(U) + UᵀA = zeros(T, r, n+1-j) UᵀU = (view(U, j:n, :))' * view(U, j:n, :) - UᵀW = zeros(eltype(U), r, p) + UᵀW = zeros(T, r, p) for i in j:n - UᵀU .-= view(U, i, :) * (view(U, i, :))' - UᵀA[:,i+1-j] = UᵀU*view(V, i, :) + (view(U, max(j,i-m) : min(i+l,n), :))'*view(B, max(j,i-m) : min(i+l,n), i) + UᵀW*view(S, i, :) - UᵀW .+= view(U, i, :) * (view(W, i, :))' + #UᵀU -= U[i,:] * (U[i,:])' + mul!(UᵀU, view(U, i, :), (view(U, i, :))', -one(T), one(T)) + #UᵀA[:,i+1-j] = UᵀU*V[i,:] + (U[max(j,i-m) : min(i+l,n),:])'*B[max(j,i-m) : min(i+l,n),i] + UᵀW*S[i,:] + mul!(view(UᵀA, :, i+1-j), UᵀU, view(V, i, :), one(T), zero(T)) + mul!(view(UᵀA, :, i+1-j), (view(U, max(j,i-m) : min(i+l,n), :))', view(B, max(j,i-m) : min(i+l,n), i), one(T), one(T)) + mul!(view(UᵀA, :, i+1-j), UᵀW, view(S, i, :), one(T), one(T)) + #UᵀW += U[i,:] * (W[i,:])' + mul!(UᵀW, view(U, i, :), (view(W, i, :))', one(T), one(T)) end UᵀA end @@ -369,7 +483,7 @@ end ### -# Support ldiv! +# Support lmul! ### function getproperty(F::QR{<:Any,<:BandedPlusSemiseparableMatrix}, d::Symbol) @@ -383,9 +497,60 @@ function getproperty(F::QR{<:Any,<:BandedPlusSemiseparableMatrix}, d::Symbol) end end -function lmul!(adjQ::AdjointQ{<:Any,<:QRPackedQ{<:Any,<:BandedPlusSemiseparableMatrix}}, B::StridedVector) +function lmul!(adjQ::AdjointQ{<:Any,<:QRPackedQ{<:Any,<:BandedPlusSemiseparableMatrix}}, b::StridedVector) Q = parent(adjQ) - factors = Q.factors + F = Q.factors τ = Q.τ - error("implement") + n, r = size(F.U) + T = eltype(F.U) + l, m = bandwidths(F.B) + O = zeros(T, n, r) + h = zeros(T, r) + G = zeros(T, n, l+1) + Uᵀb = Uᵀb_lookup_table(F, b) + UᵀU = UᵀU_lookup_table(F) + for j = 1 : n - 1 + # c is a scalar: + #c = F.V[j,:]' * Uᵀb[j+1,:] + b[j] + F.B[j+1:min(j+l,n), j]' * b[j+1:min(j+l,n)] + F.V[j,:]'*UᵀU[j+1,:,:]*h + + # (F.U[j,:]' + F.B[j+1:min(j+l,n), j]'*F.U[j+1:min(j+l,n),:])*h + sum(F.V[j,:]'*F.U[j+1:min(j+l,n),:]'*G[j+1:min(j+l,n),:]) + + # sum(G[j,:]' + F.B[j+1:min(j+l,n), j]'*G[j+1:min(j+l,n),:]) + c = view(F.V, j, :)'*view(Uᵀb, j+1, :) + b[j] + view(F.B, j+1:min(j+l,n), j)'*view(b, j+1:min(j+l,n)) + + view(F.V, j,:)'*view(UᵀU, j+1,:,:)*h + view(F.U, j,:)'*h + view(F.B, j+1:min(j+l,n),j)'*view(F.U, j+1:min(j+l,n),:)*h + + sum(view(F.V,j,:)'*view(F.U,j+1:min(j+l,n),:)'*view(G,j+1:min(j+l,n),:)) + + sum(view(G,j,:)'+view(F.B,j+1:min(j+l,n), j)'*view(G,j+1:min(j+l,n),:)) + + O[j, :] = F.U[j, :] .* h + #h = h - τ[j] * c * F.V[j, :] + mul!(h, I, view(F.V, j, :), -τ[j]*c, one(T)) + for t = 1 : min(l+1, n-j+1) + if t == 1 + G[j+t-1, t] = -τ[j] * c * 1.0 + else + G[j+t-1, t] = -τ[j] * c * F.B[j+t-1, j] + end + end + end + #b[1:end] += sum(O, dims=2) + sum(G, dims=2) + for i = 1 : r + mul!(view(b,:), I, view(O, :, i), one(T), one(T)) + end + for i = 1 : l+1 + mul!(view(b,:), I, view(G, :, i), one(T), one(T)) + end + #b[n] += F.U[n,:]'*h + b[n] += view(F.U, n, :)' * h +end + +function Uᵀb_lookup_table(F, b) + n, r = size(F.U) + T = eltype(F) + Uᵀb = zeros(T, n, r) + Uᵀb_current = zeros(T, r) + for i in n:-1:1 + #Uᵀb_current .+= F.U[i,:] * b[i] + mul!(Uᵀb_current, I, view(F.U, i, :), b[i], one(T)) + #Uᵀb[i,:] .= Uᵀb_current + view(Uᵀb, i, :) .= Uᵀb_current + end + Uᵀb end \ No newline at end of file diff --git a/test/test_qr.jl b/test/test_qr.jl index ee50a4a..74e94fc 100644 --- a/test/test_qr.jl +++ b/test/test_qr.jl @@ -1,6 +1,6 @@ using BandedMatrices, LinearAlgebra, Random, SemiseparableMatrices, Test using SemiseparableMatrices: BandedPlusSemiseparableQRPerturbedFactors -#Random.seed!(1234) +Random.seed!(1234) @testset "QR" begin n = 20 @@ -34,7 +34,19 @@ using SemiseparableMatrices: BandedPlusSemiseparableQRPerturbedFactors @test R ≡ UpperTriangular(fact.factors) - lmul!(Q',b) - ldiv!(R, b) + res = lmul!(Q',b) + println(res) + F = fact.factors + τ = fact.τ + for i = 1 : n-1 + y = zeros(n) + y[i] = 1 + y[i+1:n] = F[i+1:n, i] + b = (I-τ[i]*y*y')*b + end + @test b ≈ res + #x = R \ b + #ldiv!(R, b) + #@test x ≈ b end end \ No newline at end of file From dd1ca1f2072ae781b5b8df7dd31025141a4ce62c Mon Sep 17 00:00:00 2001 From: TaoChenImperial Date: Wed, 3 Dec 2025 12:42:17 +0000 Subject: [PATCH 19/19] update test_qr.jl --- test/test_qr.jl | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/test/test_qr.jl b/test/test_qr.jl index 74e94fc..39011be 100644 --- a/test/test_qr.jl +++ b/test/test_qr.jl @@ -1,6 +1,6 @@ using BandedMatrices, LinearAlgebra, Random, SemiseparableMatrices, Test using SemiseparableMatrices: BandedPlusSemiseparableQRPerturbedFactors -Random.seed!(1234) +#Random.seed!(1234) @testset "QR" begin n = 20 @@ -11,7 +11,7 @@ Random.seed!(1234) U,V = randn(n,r), randn(n,r) W,S = randn(n,p), randn(n,p) A = BandedPlusSemiseparableQRPerturbedFactors(B, (U,V), (W,S)) - @test @inferred(size(A)) == (20,20) + @test @inferred(size(A)) == (n,n) fact_true = LinearAlgebra.qrfactUnblocked!(Matrix(A)) fact = @inferred(qr!(A)) @test A ≈ fact_true.factors ≈ fact.factors @@ -34,19 +34,22 @@ Random.seed!(1234) @test R ≡ UpperTriangular(fact.factors) - res = lmul!(Q',b) - println(res) - F = fact.factors - τ = fact.τ + # test lmul! + F = fact_true.factors + τ = fact_true.τ + b_true = copy(b) for i = 1 : n-1 y = zeros(n) y[i] = 1 y[i+1:n] = F[i+1:n, i] - b = (I-τ[i]*y*y')*b + b_true = (I-τ[i]*y*y')*b_true end - @test b ≈ res - #x = R \ b - #ldiv!(R, b) - #@test x ≈ b + lmul!(Q',b) + @test b ≈ b_true + + #test ldiv! + x = triu(F) \ b + ldiv!(R, b) + @test b ≈ x end end \ No newline at end of file