Merge pull request #403 from danielwe/opnorm

maleadt · web-flow · commit fc0d327ecc2f · 2022-05-19T11:35:11.000+02:00
Implement opnorm
diff --git a/src/host/linalg.jl b/src/host/linalg.jl
@@ -296,45 +296,63 @@ end
 ## norm
 
 function LinearAlgebra.norm(v::AbstractGPUArray{T}, p::Real=2) where {T}
-    result_type = typeof(float(norm(zero(T))))
+    result_type, sum_type, promote_ = _normtypes(T)
     isempty(v) && return zero(result_type)
     p == 0 && return convert(result_type, count(!iszero, v))
-    # Accumulate in at least Float32, like nrm2 in CUBLAS
-    acc_type = promote_type(Float32, result_type)
-    spp = convert(acc_type, p)
-    init = zero(acc_type)  # To set the accumulation type in `sum`
-    # If acc_type is wider than T, widen before applying other functions. To work in GPU
-    # kernels this operation must close around a value, not a type, hence the prototype
-    prototype = zero(promote_type(T, acc_type))
-    widen(x) = convert(typeof(prototype), x)
+    spp = convert(sum_type, p)
+    init = zero(sum_type)  # To set the accumulation type in `sum`
     # Rescaling heuristic similar to Base, see LinearAlgebra/src/generic.jl
     result = if p > 1 || p < -1  # May need rescaling
         infnorm = p > 1 ? maximum(norm, v) : minimum(norm, v)
         if isinf(p) || iszero(infnorm) || isinf(infnorm)
             return convert(result_type, infnorm)  # Return early to skip conversions
         end
-        factor = convert(acc_type, infnorm)
+        factor = convert(sum_type, infnorm)
         if p == 2
             if isfinite(length(v) * factor^2) && !iszero(factor^2)  # No rescaling
-                sqrt(sum(x -> LinearAlgebra.norm_sqr(widen(x)), v; init=init))
+                sqrt(sum(x -> LinearAlgebra.norm_sqr(promote_(x)), v; init=init))
             else  # Rescaling
-                factor * sqrt(sum(x -> (norm(widen(x)) / factor)^2, v; init=init))
+                factor * sqrt(sum(x -> (norm(promote_(x)) / factor)^2, v; init=init))
             end
         else
             if isfinite(length(v) * factor^spp) && !iszero(factor^spp)  # No rescaling
-                sum(x -> norm(widen(x))^spp, v; init=init)^inv(spp)
+                sum(x -> norm(promote_(x))^spp, v; init=init)^inv(spp)
             else  # Rescaling
-                factor * (sum(x -> (norm(widen(x)) / factor)^spp, v; init=init)^inv(spp))
+                factor * (sum(x -> (norm(promote_(x)) / factor)^spp, v; init=init)^inv(spp))
             end
         end
     elseif p == 1
-        sum(x -> norm(widen(x)), v; init=init)
+        sum(x -> norm(promote_(x)), v; init=init)
     else
-        sum(x -> norm(widen(x))^spp, v; init=init)^inv(spp)
+        sum(x -> norm(promote_(x))^spp, v; init=init)^inv(spp)
     end
     return convert(result_type, result)
 end
 
+function _normtypes(::Type{T}) where {T}
+    result_type = typeof(float(norm(zero(T))))
+    # Accumulate in at least Float32, like nrm2 in CUBLAS
+    sum_type = promote_type(Float32, result_type)
+    # If sum_type is wider than T, promote before applying other functions. To work in GPU
+    # kernels this operation must close around a value, not a type, hence the prototype
+    prototype = zero(promote_type(T, sum_type))
+    promote_(x) = convert(typeof(prototype), x)
+    return result_type, sum_type, promote_
+end
+
+## opnorm
+
+function LinearAlgebra.opnorm1(A::AnyGPUArray{T,2}) where {T}
+    result_type, sum_type, promote_ = _normtypes(T)
+    result = maximum(sum(x -> norm(promote_(x)), A; dims=1); init=zero(sum_type))
+    return convert(result_type, result)
+end
+
+function LinearAlgebra.opnormInf(A::AnyGPUArray{T,2}) where {T}
+    result_type, sum_type, promote_ = _normtypes(T)
+    result = maximum(sum(x -> norm(promote_(x)), A; dims=2); init=zero(sum_type))
+    return convert(result_type, result)
+end
 
 ## symmetric
 
diff --git a/test/testsuite/linalg.jl b/test/testsuite/linalg.jl
@@ -255,4 +255,19 @@ end
             @test compare(norm, AT, arr, Ref(p))
         end
     end
+    @testset "$p-opnorm($sz x $T)" for sz in [(2, 0), (2, 3)],
+                                     p in Any[1, Inf],
+                                     T in eltypes
+        if T == Int8
+            continue
+        end
+        if !in(float(real(T)), eltypes)
+            # norm promotes to float, so make sure that type is supported
+            continue
+        end
+        range = real(T) <: Integer ? (T.(1:10)) : T # prevent integer overflow
+        mat = rand(range, sz)
+        @test compare(opnorm, AT, mat, Ref(p))
+        @test isrealfloattype(typeof(opnorm(AT(mat), p)))
+    end
 end