Skip to content

Commit 12e6f0a

Browse files
author
Github action runner
committed
Fix fill! memory
1 parent 4a41930 commit 12e6f0a

File tree

2 files changed

+23
-4
lines changed

2 files changed

+23
-4
lines changed

src/array.jl

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -505,8 +505,17 @@ fill(v, dims...) = fill!(oneArray{typeof(v)}(undef, dims...), v)
505505
fill(v, dims::Dims) = fill!(oneArray{typeof(v)}(undef, dims...), v)
506506

507507
function Base.fill!(A::oneDenseArray{T}, val) where T
508-
B = [convert(T, val)]
509-
unsafe_fill!(context(A), device(), pointer(A), pointer(B), length(A))
508+
length(A) == 0 && return A
509+
val = convert(T, val)
510+
sizeof(T) == 0 && return A
511+
512+
# execute! is async, so we need to allocate the pattern in USM memory
513+
# and keep it alive until the operation completes.
514+
buf = oneL0.host_alloc(context(A), sizeof(T), Base.datatype_alignment(T))
515+
unsafe_store!(convert(Ptr{T}, buf), val)
516+
unsafe_fill!(context(A), device(), pointer(A), convert(ZePtr{T}, buf), length(A))
517+
synchronize(global_queue(context(A), device()))
518+
oneL0.free(buf)
510519
A
511520
end
512521

test/level-zero.jl

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -271,13 +271,23 @@ let src = rand(Int, 1024)
271271
synchronize(queue)
272272
@test chk == src
273273

274+
# FIX: Allocate pattern in USM Host Memory
275+
# Standard Host memory (stack/heap) is not accessible by discrete GPUs for fill patterns.
276+
# We must use USM Host Memory.
277+
pattern_val = 42
278+
pattern_buf = oneL0.host_alloc(ctx, sizeof(Int), Base.datatype_alignment(Int))
279+
unsafe_store!(convert(Ptr{Int}, pattern_buf), pattern_val)
280+
274281
execute!(queue) do list
275-
pattern = [42]
276-
append_fill!(list, pointer(dst), pointer(pattern), sizeof(pattern), sizeof(src))
282+
# Use the USM pointer (converted to ZePtr)
283+
append_fill!(list, pointer(dst), convert(ZePtr{Int}, pattern_buf), sizeof(Int), sizeof(src))
277284
append_barrier!(list)
278285
append_copy!(list, pointer(chk), pointer(dst), sizeof(src))
279286
end
280287
synchronize(queue)
288+
289+
oneL0.free(pattern_buf)
290+
281291
@test all(isequal(42), chk)
282292

283293
free(dst)

0 commit comments

Comments
 (0)