@@ -134,17 +134,15 @@ Base.collect(X::AnyGPUArray) = collect_to_cpu(X)
134134
135135# memory copying
136136
137+ # expects the GPU array type to have linear `copyto!` methods (i.e. accepting an integer
138+ # offset and length) from and to CPU arrays and between GPU arrays.
139+
137140function Base. copy! (dst:: AbstractGPUVector , src:: AbstractGPUVector )
138141 axes (dst) == axes (src) || throw (ArgumentError (
139142 " arrays must have the same axes for `copy!`. consider using `copyto!` instead" ))
140143 copyto! (dst, src)
141144end
142145
143- # # basic linear copies of identically-typed memory
144-
145- # expects the GPU array type to have linear `copyto!` methods (i.e. accepting an integer
146- # offset and length) from and to CPU arrays and between GPU arrays.
147-
148146for (D, S) in ((AnyGPUArray, Array),
149147 (Array, AnyGPUArray),
150148 (AnyGPUArray, AnyGPUArray))
@@ -156,18 +154,6 @@ for (D, S) in ((AnyGPUArray, Array),
156154 copyto! (dest, drange, src, srange)
157155 end
158156
159- function Base. copyto! (dest:: $D , d_range:: CartesianIndices{1} ,
160- src:: $S , s_range:: CartesianIndices{1} )
161- len = length (d_range)
162- if length (s_range) != len
163- throw (ArgumentError (" Copy range needs same length. Found: dest: $len , src: $(length (s_range)) " ))
164- end
165- len == 0 && return dest
166- d_offset = first (d_range)[1 ]
167- s_offset = first (s_range)[1 ]
168- copyto! (dest, d_offset, src, s_offset, len)
169- end
170-
171157 Base. copyto! (dest:: $D , src:: $S ) = copyto! (dest, 1 , src, 1 , length (src))
172158 end
173159end
@@ -253,6 +239,13 @@ function Base.copyto!(dest::AnyGPUArray{<:Any, N}, destcrange::CartesianIndices{
253239 len = length (destcrange)
254240 len == 0 && return dest
255241
242+ # linear copy if we can
243+ if N == 1
244+ d_offset = first (destcrange)[1 ]
245+ s_offset = first (srccrange)[1 ]
246+ return copyto! (dest, d_offset, src, s_offset, len)
247+ end
248+
256249 dest_offsets = first (destcrange) - oneunit (CartesianIndex{N})
257250 src_offsets = first (srccrange) - oneunit (CartesianIndex{N})
258251 kernel = cartesian_copy_kernel! (get_backend (dest))
@@ -267,6 +260,15 @@ for (dstTyp, srcTyp) in (AbstractGPUArray=>Array, Array=>AbstractGPUArray)
267260 if size (dstrange) != size (srcrange)
268261 throw (ArgumentError (" source and destination must have same size (got $(size (srcrange)) and $(size (dstrange)) )" ))
269262 end
263+ len = length (dstrange)
264+ len == 0 && return dest
265+
266+ # linear copy if we can
267+ if N == 1
268+ d_offset = first (dstrange)[1 ]
269+ s_offset = first (srcrange)[1 ]
270+ return copyto! (dst, d_offset, src, s_offset, len)
271+ end
270272
271273 # figure out how many dimensions of the Cartesian ranges map onto contiguous memory
272274 # in both source and destination. we will copy these one by one as linear ranges.
0 commit comments