1414
1515module D = Debug. Make (struct let name = " topology" end )
1616
17- open D
18-
1917module CPUSet = struct
20- include Set. Make (struct
21- type t = int
22-
23- let compare (x : int ) (y : int ) = compare x y
24- end )
18+ include Set. Make (Int )
2519
2620 let pp_dump = Fmt. using to_seq Fmt. (Dump. seq int )
2721
@@ -95,27 +89,32 @@ let seq_range a b =
9589 let rec next i () = if i = b then Seq. Nil else Seq. Cons (i, next (i + 1 )) in
9690 next a
9791
98- (* * [gen_2n n] Generates all non-empty subsets of the set of [n] nodes. *)
99- let seq_gen_2n n =
92+ let seq_filteri p s =
93+ let rec loop i s () =
94+ match s () with
95+ | Seq. Nil ->
96+ Seq. Nil
97+ | Cons (hd , s ) ->
98+ if p i hd then
99+ Cons (hd, loop (i + 1 ) s)
100+ else
101+ loop (i + 1 ) s ()
102+ in
103+ loop 0 s
104+
105+ (* * [seq_all_subsets n] Generates all non-empty subsets of the [nodes] set. *)
106+ let seq_all_subsets nodes =
107+ let n = Seq. length nodes in
100108 (* A node can either be present in the output or not, so use a loop [1, 2^n)
101109 and have the [i]th bit determine that. *)
102- let of_mask i =
103- seq_range 0 n |> Seq. filter (fun bit -> (i lsr bit) land 1 = 1 )
104- in
110+ let of_mask i = nodes |> seq_filteri (fun bit _ -> (i lsr bit) land 1 = 1 ) in
105111 seq_range 1 (1 lsl n) |> Seq. map of_mask
106112
107113(* * [seq_sort ~cmp s] sorts [s] in a temporary place using [cmp]. *)
108114let seq_sort ~cmp s =
109115 let a = Array. of_seq s in
110116 Array. fast_sort cmp a ; Array. to_seq a
111117
112- (* * [seq_append a b] is the sequence [a] followed by [b] *)
113- let seq_append (a : 'a Seq.t ) (b : 'a Seq.t ) =
114- let rec next v () =
115- match v () with Seq. Nil -> b () | Seq. Cons (x , xs ) -> Seq. Cons (x, next xs)
116- in
117- next a
118-
119118module NUMA = struct
120119 type node = Node of int
121120
@@ -125,31 +124,43 @@ module NUMA = struct
125124 let compare (Node a ) (Node b ) = compare a b
126125 end )
127126
127+ (* -1 in 32 bits *)
128+ let unreachable_distance = 0xFFFFFFFF
129+
130+ let self_distance = 10
131+
128132 (* no mutation is exposed in the interface, therefore this is immutable *)
129133 type t = {
130134 distances : int array array
131135 ; cpu_to_node : node array
132136 ; node_cpus : CPUSet .t array
133137 ; all : CPUSet .t
134138 ; node_usage : int array
139+ (* * Usage across nodes is meant to be balanced when choosing candidates for a VM *)
135140 ; candidates : (float * node Seq .t ) Seq .t
141+ (* * Sequence of all subsets of nodes and the average distance within
142+ the subset, sorted by the latter in increasing order. *)
136143 }
137144
138145 let node_of_int i = Node i
139146
140147 let node_distances d nodes =
141- let dists =
142- nodes |> Seq. flat_map (fun n1 -> nodes |> Seq. map (fun n2 -> d.(n1).(n2)))
143- in
144- let count, max_dist, sum_dist =
145- Seq. fold_left
146- (fun (count , maxv , sum ) e -> (count + 1 , max maxv e, sum + e))
147- (0 , min_int, 0 ) dists
148- in
149- (* We want to minimize maximum distance first, and average distance next.
150- When running the VM we don't know which pCPU it'll end up using, and want
151- to limit the worst case performance. *)
152- ((max_dist, float sum_dist /. float count), nodes)
148+ if Seq. is_empty nodes then
149+ None
150+ else
151+ let dists =
152+ nodes
153+ |> Seq. flat_map (fun n1 -> nodes |> Seq. map (fun n2 -> d.(n1).(n2)))
154+ in
155+ let count, max_dist, sum_dist =
156+ Seq. fold_left
157+ (fun (count , maxv , sum ) e -> (count + 1 , max maxv e, sum + e))
158+ (0 , min_int, 0 ) dists
159+ in
160+ (* We want to minimize maximum distance first, and average distance next.
161+ When running the VM we don't know which pCPU it'll end up using, and want
162+ to limit the worst case performance. *)
163+ Some ((max_dist, float sum_dist /. float count), nodes)
153164
154165 let dist_cmp (a1 , _ ) (b1 , _ ) = compare a1 b1
155166
@@ -159,60 +170,96 @@ module NUMA = struct
159170 [n*multiply ... n*multiply + multiply-1], except we always the add the
160171 single NUMA node combinations. *)
161172 (* make sure that single NUMA nodes are always present in the combinations *)
162- let single_nodes =
173+ let distance_to_candidate d = (d, float d) in
174+ let valid_nodes =
163175 seq_range 0 (Array. length d)
164- |> Seq. map (fun i -> ((10 , 10.0 ), Seq. return i))
176+ |> Seq. filter_map (fun i ->
177+ let self = d.(i).(i) in
178+ if self <> unreachable_distance then
179+ Some i
180+ else
181+ None
182+ )
165183 in
166- let numa_nodes = Array . length d in
184+ let numa_nodes = Seq . length valid_nodes in
167185 let nodes =
168- if numa_nodes > 16 then
169- (* try just the single nodes, and give up (use all nodes otherwise) to
170- avoid exponential running time. We could do better here, e.g. by
186+ if numa_nodes > 16 then (
187+ (* Avoid generating too many candidates because of the exponential
188+ running time. We could do better here, e.g. by
171189 reducing the matrix *)
172- single_nodes
173- else
174- numa_nodes
175- |> seq_gen_2n
176- |> Seq. map (node_distances d)
177- |> seq_append single_nodes
190+ D. info
191+ " %s: More than 16 valid NUMA nodes detected, considering only \
192+ individual nodes."
193+ __FUNCTION__ ;
194+ valid_nodes
195+ |> Seq. map (fun i ->
196+ let self = d.(i).(i) in
197+ (distance_to_candidate self, Seq. return i)
198+ )
199+ ) else
200+ valid_nodes |> seq_all_subsets |> Seq. filter_map (node_distances d)
178201 in
179202 nodes
180203 |> seq_sort ~cmp: dist_cmp
181204 |> Seq. map (fun ((_ , avg ), nodes ) -> (avg, Seq. map (fun n -> Node n) nodes))
182205
183- let pp_dump_distances = Fmt. (int |> Dump. array |> Dump. array )
184-
185206 let make ~distances ~cpu_to_node =
186- debug " Distances: %s" (Fmt. to_to_string pp_dump_distances distances) ;
187- debug " CPU2Node: %s" (Fmt. to_to_string Fmt. (Dump. array int ) cpu_to_node) ;
207+ let ( let * ) = Option. bind in
188208 let node_cpus = Array. map (fun _ -> CPUSet. empty) distances in
209+
210+ (* nothing can be scheduled on unreachable nodes, remove them from the
211+ node_cpus *)
189212 Array. iteri
190- (fun i node -> node_cpus.(node) < - CPUSet. add i node_cpus.(node))
191- cpu_to_node ;
192- Array. iteri
193- (fun i row ->
194- let d = distances.(i).(i) in
195- if d <> 10 then
196- invalid_arg
197- (Printf. sprintf " NUMA distance from node to itself must be 10: %d" d) ;
198- Array. iteri
199- (fun _ d ->
200- if d < 10 then
201- invalid_arg (Printf. sprintf " NUMA distance must be >= 10: %d" d)
202- )
203- row
213+ (fun i node ->
214+ let self = distances.(node).(node) in
215+ if self <> unreachable_distance then
216+ node_cpus.(node) < - CPUSet. add i node_cpus.(node)
204217 )
205- distances ;
206- let all = Array. fold_left CPUSet. union CPUSet. empty node_cpus in
207- let candidates = gen_candidates distances in
208- {
218+ cpu_to_node ;
219+
220+ let * () =
221+ if Array. for_all (fun cpus -> CPUSet. is_empty cpus) node_cpus then (
222+ D. info
223+ " Not enabling NUMA: the ACPI SLIT only contains unreachable nodes." ;
224+ None
225+ ) else
226+ Some ()
227+ in
228+
229+ let numa_matrix_is_reasonable =
209230 distances
210- ; cpu_to_node= Array. map node_of_int cpu_to_node
211- ; node_cpus
212- ; all
213- ; node_usage= Array. map (fun _ -> 0 ) distances
214- ; candidates
215- }
231+ |> Array. to_seqi
232+ |> Seq. for_all (fun (i , row ) ->
233+ let d = distances.(i).(i) in
234+ (d = unreachable_distance || d = self_distance)
235+ && Array. for_all
236+ (fun d -> d > = self_distance || d = unreachable_distance)
237+ row
238+ )
239+ in
240+
241+ let * () =
242+ if not numa_matrix_is_reasonable then (
243+ D. info
244+ " Not enabling NUMA: the ACPI SLIT table contains values that are \
245+ invalid." ;
246+ None
247+ ) else
248+ Some ()
249+ in
250+
251+ let candidates = gen_candidates distances in
252+
253+ let all = Array. fold_left CPUSet. union CPUSet. empty node_cpus in
254+ Some
255+ {
256+ distances
257+ ; cpu_to_node= Array. map node_of_int cpu_to_node
258+ ; node_cpus
259+ ; all
260+ ; node_usage= Array. map (fun _ -> 0 ) distances
261+ ; candidates
262+ }
216263
217264 let distance t (Node a ) (Node b ) = t.distances.(a).(b)
218265
0 commit comments