Skip to content

Commit 460bc15

Browse files
committed
improved performance of racm.nextJobsRestricted UDF. using table variable iso common table expressions in single table. somehow MSSQL started performing very bad, even if only ~200 queries were pending.
1 parent 1c54da0 commit 460bc15

File tree

1 file changed

+71
-0
lines changed

1 file changed

+71
-0
lines changed
Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
CREATE or ALTER function [racm].[nextJobsRestricted](@compmUUID varchar(64), @timeout real,
2+
@interval real, @maxNum integer,@maxPerUser integer)
3+
returns @rt table(
4+
jobId bigint,submitterId bigint,
5+
computeDomainId bigint, submitTime datetime,
6+
usageWeight float,numQueued integer, numStarted integer, ranking integer)
7+
as
8+
begin
9+
10+
declare @computeDomainId bigint, @currentDate datetime = getDate()
11+
12+
select @computeDomainId = computeDomainId
13+
from COMPM
14+
where uuid=@compmUUID
15+
;
16+
17+
declare @prev table (submitterId bigint, numJobs integer, numQueued integer
18+
, numStarted integer, totTime float, usageWeight float)
19+
20+
insert into @prev
21+
select dj.submitterId
22+
, count(*) as numJobs
23+
, sum(case when dj.status between 2 and 4 then 1 else 0 end) as numQueued
24+
, sum(case when dj.status = 8 then 1 else 0 end) as numStarted
25+
, sum(case when dj.finishedTime is null then @timeout -- should really be cast(datediff(second,startedTime, getDate()) as float)
26+
else cast(datediff(second,dj.startedTime, dj.finishedtime) as float)
27+
end ) as totTime
28+
, sum((case when dj.finishedTime is null then @timeout -- should really be cast(datediff(second,startedTime, getDate()) as float)
29+
else cast(datediff_big(millisecond,dj.startedTime, dj.finishedtime) as float)
30+
end )/datediff_big(millisecond,dj.startedTime,@currentDate)) as usageWeight
31+
from compm c , job dj
32+
where c.uuid = @compmUUID
33+
and dj.computeDomainId=c.computeDomainId
34+
and dj.status > 1 -- PENDING
35+
and dj.startedTime >= dateadd(second,-@interval,getDate())
36+
group by submitterid
37+
38+
declare @pending table (id bigint, submitterId bigint, computeDomainId bigint
39+
, submitTime datetime,rankSubmitted integer)
40+
41+
insert into @pending
42+
select dj.id, dj.submitterId, dj.computeDomainId, submitTime
43+
, rank() over (partition by submitterId order by submitTime) as rankSubmitted
44+
from compm c , job dj
45+
where c.uuid = @compmUUID
46+
and dj.computeDomainId=c.computeDomainId
47+
and dj.status=1 -- PENDING
48+
49+
;
50+
with final as (
51+
select j.Id as jobId,j.submitterId, j.computeDomainId, j.submitTime -- without TOP cannot do an order by in a function
52+
, isnull(p.usageWeight,0) as usageWeight
53+
, isnull(p.numQueued,0) as numQueued
54+
, isnull(p.numStarted,0) as numStarted
55+
, rank() over (order by rankSubmitted,usageWeight,submitTime) as ranking
56+
from @pending j left outer join @prev p
57+
on p.submitterId = j.submitterid
58+
where j.rankSubmitted <=@maxNum
59+
and j.rankSubmitted+isnull(p.numQueued,0)+isnull(p.numStarted,0)<=@maxPerUser
60+
)
61+
insert into @rt
62+
select * -- jobId,submitterId, computeDomainId, submitTime , usageWeight, numQueued, numStarted, ranking
63+
from final
64+
where ranking <= @maxNum
65+
-- and rankSubmitted+numQueued+numStarted<=@maxPerUser -- to counter possibility COMPM gets single user too many times in internal queue
66+
67+
return
68+
end
69+
70+
71+

0 commit comments

Comments
 (0)