Skip to content

Commit 99fd22d

Browse files
authored
CA-416959 Copy bonds and sriov from master using device position (#6669)
For pool join procedure, the slave host will go through 1 pre_join_check 2 update_non_vm_metadata: in this step, create pif objects for physical pifs of slave host on master host db. 3 on slave xapi start: copy_bonds_from_master, copy_network_sriovs_from_master, copy_vlans_from_master, copy_tunnels_from_master This issue happens on step 3 when copy bonds from master. for every bond on master - get the bond master pif on master - get the bond slave pifs on master - confirm the primary bond slaves via comparing the bond master pif mac - find the corresponding pifs on slave host - decide if create bond on slave host In `find the corresponding pifs on slave host`, the previous method is to compare the pif.device on master and slave hosts. Now we should compare the position of pif device. Because the network device name is no longer renamed to ethx, which x is actually the position. It's similar for copy_network_sriovs_from_master. For vlans and tunnels, they are based the underneath network, no pif.device compare. So, this issue doesn't exist.
2 parents 1cd018b + aedacd2 commit 99fd22d

File tree

3 files changed

+76
-52
lines changed

3 files changed

+76
-52
lines changed

ocaml/xapi/sync_networking.ml

Lines changed: 68 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,20 @@ let fix_bonds ~__context () =
5454
in
5555
List.iter (fun bond -> Xapi_bond.fix_bond ~__context ~bond) my_bonds
5656

57+
let get_my_physical_pifs_with_position ~__context =
58+
let me = !Xapi_globs.localhost_ref in
59+
Db.PIF.get_records_where ~__context
60+
~expr:
61+
(And
62+
( Eq (Field "host", Literal (Ref.string_of me))
63+
, Eq (Field "physical", Literal "true")
64+
)
65+
)
66+
|> List.filter_map (fun (pif_ref, pif_rec) ->
67+
Xapi_pif_helpers.get_pif_position ~__context ~pif_rec
68+
|> Option.map (fun position -> (pif_ref, pif_rec, position))
69+
)
70+
5771
(** Copy Bonds from master *)
5872
let copy_bonds_from_master ~__context () =
5973
(* if slave: then inherit network config (bonds and vlans) from master (if we don't already have them) *)
@@ -84,26 +98,21 @@ let copy_bonds_from_master ~__context () =
8498
)
8599
)
86100
in
87-
let my_phy_pifs =
88-
Db.PIF.get_records_where ~__context
89-
~expr:
90-
(And
91-
( Eq (Field "host", Literal (Ref.string_of me))
92-
, Eq (Field "physical", Literal "true")
93-
)
94-
)
101+
let my_physical_pifs_with_position =
102+
get_my_physical_pifs_with_position ~__context
95103
in
96104
(* Consider Bonds *)
97105
debug "Resynchronising bonds" ;
98106
let maybe_create_bond_for_me bond =
99107
let network = Db.PIF.get_network ~__context ~self:bond.API.bond_master in
100-
let slaves_to_mac_and_device_map =
101-
List.map
108+
let slaves_to_mac_device_and_position_map =
109+
List.filter_map
102110
(fun self ->
103-
( self
104-
, Db.PIF.get_MAC ~__context ~self
105-
, Db.PIF.get_device ~__context ~self
106-
)
111+
let pif_rec = Db.PIF.get_record ~__context ~self in
112+
Xapi_pif_helpers.get_pif_position ~__context ~pif_rec
113+
|> Option.map (fun position ->
114+
(self, pif_rec.API.pIF_MAC, pif_rec.API.pIF_device, position)
115+
)
107116
)
108117
bond.API.bond_slaves
109118
in
@@ -119,50 +128,55 @@ let copy_bonds_from_master ~__context () =
119128
let master_slaves_with_same_mac_as_bond
120129
(* expecting a list of at most 1 here *) =
121130
List.filter
122-
(fun (_, mac, _) -> mac = master_bond_mac)
123-
slaves_to_mac_and_device_map
131+
(fun (_, mac, _, _) -> mac = master_bond_mac)
132+
slaves_to_mac_device_and_position_map
124133
in
125134
(* This tells us the device that the master used to inherit the bond's MAC address
126135
* (if indeed that is what it did; we set it to None if we think it didn't do this) *)
127-
let device_of_primary_slave =
136+
let position_of_primary_slave =
128137
match master_slaves_with_same_mac_as_bond with
129138
| [] ->
130139
None
131-
| [(_, _, device)] ->
132-
debug "Master bond has MAC address derived from %s" device ;
140+
| [(_, _, device, position)] ->
141+
debug "Master bond has MAC address derived from %s, position %d"
142+
device position ;
133143
(* found single slave with mac matching bond master =>
134144
* this was one that we inherited mac from *)
135-
Some device
145+
Some position
136146
| _ ->
137147
None
138148
in
139149
(* Look at the master's slaves and find the corresponding slave PIFs. Note that the slave
140150
* might not have the necessary devices: in this case we'll try to make partial bonds *)
141-
let slave_devices =
142-
List.map (fun (_, _, device) -> device) slaves_to_mac_and_device_map
151+
let slave_positions =
152+
List.map
153+
(fun (_, _, _, position) -> position)
154+
slaves_to_mac_device_and_position_map
143155
in
144-
let my_slave_pifs =
156+
let my_slave_pifs_with_position =
145157
List.filter
146-
(fun (_, pif) -> List.mem pif.API.pIF_device slave_devices)
147-
my_phy_pifs
158+
(fun (_, _, position) -> List.mem position slave_positions)
159+
my_physical_pifs_with_position
160+
in
161+
let my_slave_pif_refs =
162+
List.map (fun (pif_ref, _, _) -> pif_ref) my_slave_pifs_with_position
148163
in
149-
let my_slave_pif_refs = List.map fst my_slave_pifs in
150164
(* Do I have a pif that I should treat as a primary pif -
151165
* i.e. the one to inherit the MAC address from on my bond create? *)
152166
let my_primary_slave =
153-
match device_of_primary_slave with
167+
match position_of_primary_slave with
154168
| None ->
155169
None
156170
(* don't care cos we couldn't even figure out who master's primary slave was *)
157171
| Some master_primary -> (
158172
match
159173
List.filter
160-
(fun (_, pif) -> pif.API.pIF_device = master_primary)
161-
my_slave_pifs
174+
(fun (_, _, position) -> position = master_primary)
175+
my_slave_pifs_with_position
162176
with
163177
| [] ->
164178
None
165-
| [(pifref, _)] ->
179+
| [(pifref, _, _)] ->
166180
debug
167181
"I have found a PIF to use as primary bond slave (will inherit \
168182
MAC address of bond from this PIF)." ;
@@ -185,7 +199,7 @@ let copy_bonds_from_master ~__context () =
185199
in
186200
match
187201
( List.filter (fun (_, pif) -> pif.API.pIF_network = network) my_bond_pifs
188-
, my_slave_pifs
202+
, my_slave_pifs_with_position
189203
)
190204
with
191205
| [], [] ->
@@ -266,6 +280,7 @@ let copy_tunnels_from_master ~__context () =
266280
let copy_network_sriovs_from_master ~__context () =
267281
let me = !Xapi_globs.localhost_ref in
268282
let master = Helpers.get_master ~__context in
283+
let ( let& ) o f = Option.iter f o in
269284
let master_sriov_pifs =
270285
Db.PIF.get_records_where ~__context
271286
~expr:
@@ -284,14 +299,8 @@ let copy_network_sriovs_from_master ~__context () =
284299
)
285300
)
286301
in
287-
let my_physical_pifs =
288-
Db.PIF.get_records_where ~__context
289-
~expr:
290-
(And
291-
( Eq (Field "host", Literal (Ref.string_of me))
292-
, Eq (Field "physical", Literal "true")
293-
)
294-
)
302+
let my_physical_pifs_with_position =
303+
get_my_physical_pifs_with_position ~__context
295304
in
296305
debug "Resynchronising network-sriovs" ;
297306
let maybe_create_sriov_for_me (_, master_pif_rec) =
@@ -302,20 +311,34 @@ let copy_network_sriovs_from_master ~__context () =
302311
my_sriov_pifs
303312
in
304313
if existing_pif = [] then
305-
let device = master_pif_rec.API.pIF_device in
314+
let& master_sriov_physical_pif =
315+
match
316+
Xapi_pif_helpers.get_pif_topo ~__context ~pif_rec:master_pif_rec
317+
with
318+
| Network_sriov_logical _ :: Physical physical_pif :: _ ->
319+
Some physical_pif
320+
| _ ->
321+
None
322+
in
323+
let& position =
324+
Xapi_pif_helpers.get_pif_position ~__context
325+
~pif_rec:master_sriov_physical_pif
326+
in
306327
let pifs =
307328
List.filter
308-
(fun (_, pif_rec) -> pif_rec.API.pIF_device = device)
309-
my_physical_pifs
329+
(fun (_, _, pos) -> pos = position)
330+
my_physical_pifs_with_position
310331
in
311332
match pifs with
312333
| [] ->
313334
info
314335
"Cannot sync network sriov because cannot find PIF whose device \
315-
name is %s"
316-
device
317-
| (pif_ref, pif_rec) :: _ -> (
336+
position is %d"
337+
position
338+
| (pif_ref, pif_rec, _) :: _ -> (
318339
try
340+
debug "Syncing network sriov for PIF %s position %d"
341+
pif_rec.API.pIF_uuid position ;
319342
Xapi_network_sriov.create ~__context ~pif:pif_ref
320343
~network:sriov_network
321344
|> ignore

ocaml/xapi/xapi_pif.ml

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -45,9 +45,6 @@ let bridge_naming_convention (device : string) (pos_opt : int option) =
4545
| None ->
4646
"br" ^ device
4747

48-
let n_of_xenbrn_opt bridge =
49-
try Scanf.sscanf bridge "xenbr%d%!" Option.some with _ -> None
50-
5148
type tables = {
5249
device_to_position_table: (string * int) list
5350
; device_to_mac_table: (string * string) list
@@ -81,10 +78,7 @@ let get_physical_pif_device ~__context ~interface_tables ~pif_rec =
8178
)
8279
in
8380
if pif_rec.API.pIF_physical then (
84-
let bridge =
85-
Db.Network.get_bridge ~__context ~self:pif_rec.API.pIF_network
86-
in
87-
match n_of_xenbrn_opt bridge with
81+
match Xapi_pif_helpers.get_pif_position ~__context ~pif_rec with
8882
| Some position ->
8983
find_name_by_position position pif_rec.API.pIF_device
9084
| None ->

ocaml/xapi/xapi_pif_helpers.ml

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -265,3 +265,10 @@ let get_primary_address ~__context ~pif =
265265
)
266266
| `IPv6 ->
267267
List.nth_opt (get_non_link_ipv6 ~__context ~pif) 0
268+
269+
let get_pif_position ~__context ~pif_rec =
270+
let n_of_xenbrn_opt bridge =
271+
try Scanf.sscanf bridge "xenbr%d%!" Option.some with _ -> None
272+
in
273+
let bridge = Db.Network.get_bridge ~__context ~self:pif_rec.API.pIF_network in
274+
n_of_xenbrn_opt bridge

0 commit comments

Comments
 (0)