@@ -104,29 +104,28 @@ let get_uuid_to_ip_mapping () =
104104 named by UUID. *)
105105let address_of_host_uuid uuid =
106106 let table = get_uuid_to_ip_mapping () in
107- if not (List. mem_assoc uuid table) then (
108- error " Failed to find the IP address of host UUID %s" uuid ;
109- raise Not_found
110- ) else
111- List. assoc uuid table
107+ List. assoc_opt uuid table |> Option. to_result ~none: Not_found
112108
113109(* * Without using the Pool's database, returns the UUID of a particular host named by
114110 heartbeat IP address. This is only necesary because the liveset info doesn't include
115111 the host IP address *)
116112let uuid_of_host_address address =
117113 let table = List. map (fun (k , v ) -> (v, k)) (get_uuid_to_ip_mapping () ) in
118- match List. assoc_opt address table with
119- | None ->
120- error " Failed to find the UUID address of host with address %s" address ;
121- raise Not_found
122- | Some uuid_str -> (
123- match Uuidx. of_string uuid_str with
124- | None ->
125- error " Failed parse UUID of host with address %s" address ;
126- raise (Invalid_argument " Invalid UUID" )
127- | Some uuid ->
128- uuid
129- )
114+ let invalid_uuid = Invalid_argument " Invalid UUID" in
115+ let to_uuid str =
116+ Uuidx. of_string str |> Option. to_result ~none: invalid_uuid
117+ in
118+ List. assoc_opt address table
119+ |> Option. to_result ~none: Not_found
120+ |> Fun. flip Result. bind to_uuid
121+
122+ let ok_or_raise map_error = function Ok v -> v | Error exn -> map_error exn
123+
124+ let master_address_exn __FUN e =
125+ let exn = Printexc. to_string e in
126+ let msg = Printf. sprintf " unable to gather the coordinator's IP: %s" exn in
127+ error " %s: %s" __FUN msg ;
128+ raise Api_errors. (Server_error (internal_error, [msg]))
130129
131130(* * Called in two circumstances:
132131 1. When I started up I thought I was the master but my proposal was rejected by the
@@ -145,7 +144,9 @@ let on_master_failure () =
145144 done
146145 in
147146 let become_slave_of uuid =
148- let address = address_of_host_uuid uuid in
147+ let address =
148+ address_of_host_uuid uuid |> ok_or_raise (master_address_exn __FUNCTION__)
149+ in
149150 info " This node will become the slave of host %s (%s)" uuid address ;
150151 Xapi_pool_transition. become_another_masters_slave address ;
151152 (* XXX CA-16388: prevent blocking *)
@@ -170,19 +171,17 @@ let on_master_failure () =
170171 " ha_can_not_be_master_on_next_boot set: I cannot be master; looking \
171172 for another master" ;
172173 let liveset = query_liveset () in
174+ let open Xha_interface.LiveSetInformation in
173175 match
174176 Hashtbl. fold
175177 (fun uuid host acc ->
176- if
177- host.Xha_interface.LiveSetInformation.Host. master
178- && host.Xha_interface.LiveSetInformation.Host. liveness
179- (* CP-25481: a dead host may still have the master lock *)
180- then
178+ (* CP-25481: a dead host may still have the master lock *)
179+ if host.Host. master && host.Host. liveness then
181180 uuid :: acc
182181 else
183182 acc
184183 )
185- liveset.Xha_interface.LiveSetInformation. hosts []
184+ liveset.hosts []
186185 with
187186 | [] ->
188187 info " no other master exists yet; waiting 5 seconds and retrying" ;
@@ -197,6 +196,18 @@ let on_master_failure () =
197196 )
198197 done
199198
199+ let master_uuid_exn __FUN e =
200+ let exn = Printexc. to_string e in
201+ let msg = Printf. sprintf " unable to gather the coordinator's UUID: %s" exn in
202+ error " %s: %s" __FUN msg ;
203+ raise Api_errors. (Server_error (internal_error, [msg]))
204+
205+ let master_not_in_liveset_exn __FUN e =
206+ let exn = Printexc. to_string e in
207+ let msg = Printf. sprintf " unable to gather the coordinator's info: %s" exn in
208+ error " %s: %s" __FUN msg ;
209+ raise Api_errors. (Server_error (internal_error, [msg]))
210+
200211module Timeouts = struct
201212 type t = {
202213 heart_beat_interval : int
@@ -463,16 +474,17 @@ module Monitor = struct
463474 (* WARNING: must not touch the database or perform blocking I/O *)
464475 let process_liveset_on_slave liveset =
465476 let address = Pool_role. get_master_address () in
466- let master_uuid = uuid_of_host_address address in
477+ let master_uuid =
478+ uuid_of_host_address address
479+ |> ok_or_raise (master_uuid_exn __FUNCTION__)
480+ in
481+ let open Xha_interface.LiveSetInformation in
467482 let master_info =
468- Hashtbl. find liveset.Xha_interface.LiveSetInformation. hosts
469- master_uuid
483+ Hashtbl. find_opt liveset.hosts master_uuid
484+ |> Option. to_result ~none: Not_found
485+ |> ok_or_raise (master_not_in_liveset_exn __FUNCTION__)
470486 in
471- if
472- true
473- && master_info.Xha_interface.LiveSetInformation.Host. liveness
474- && master_info.Xha_interface.LiveSetInformation.Host. master
475- then
487+ if master_info.Host. liveness && master_info.Host. master then
476488 debug
477489 " The node we think is the master is still alive and marked \
478490 as master; this is OK"
@@ -1389,6 +1401,7 @@ let preconfigure_host __context localhost statevdis metadata_vdi generation =
13891401 Localdb. put Constants. ha_base_t (string_of_int base_t)
13901402
13911403let join_liveset __context host =
1404+ let __FUN = __FUNCTION__ in
13921405 info " Host.ha_join_liveset host = %s" (Ref. string_of host) ;
13931406 ha_start_daemon () ;
13941407 Localdb. put Constants. ha_disable_failover_decisions " false" ;
@@ -1406,38 +1419,35 @@ let join_liveset __context host =
14061419 (* If this host is a slave then we must wait to confirm that the master manages to
14071420 assert itself, otherwise our monitoring thread might attempt a hostile takeover *)
14081421 let master_address = Pool_role. get_master_address () in
1409- let master_uuid = uuid_of_host_address master_address in
1422+ let master_uuid =
1423+ uuid_of_host_address master_address
1424+ |> ok_or_raise (master_uuid_exn __FUN)
1425+ in
14101426 let master_found = ref false in
14111427 while not ! master_found do
14121428 (* It takes a non-trivial amount of time for the master to assert itself: we might
14131429 as well wait here rather than enumerating all the if/then/else branches where we
14141430 should wait. *)
14151431 Thread. delay 5. ;
14161432 let liveset = query_liveset () in
1417- debug " Liveset: %s"
1418- (Xha_interface.LiveSetInformation. to_summary_string liveset) ;
1419- if
1420- liveset.Xha_interface.LiveSetInformation. status
1421- = Xha_interface.LiveSetInformation.Status. Online
1422- then
1433+ let open Xha_interface.LiveSetInformation in
1434+ debug " Liveset: %s" (to_summary_string liveset) ;
1435+ if liveset.status = Status. Online then
14231436 (* 'master' is the node we believe should become the xHA-level master initially *)
14241437 let master =
1425- Hashtbl. find liveset.Xha_interface.LiveSetInformation. hosts
1426- master_uuid
1438+ Hashtbl. find_opt liveset.hosts master_uuid
1439+ |> Option. to_result ~none: Not_found
1440+ |> ok_or_raise (master_not_in_liveset_exn __FUN)
14271441 in
1428- if master.Xha_interface.LiveSetInformation. Host. master then (
1442+ if master.Host. master then (
14291443 info " existing master has successfully asserted itself" ;
14301444 master_found := true (* loop will terminate *)
14311445 ) else if
14321446 false
1433- || (not master.Xha_interface.LiveSetInformation.Host. liveness)
1434- || master
1435- .Xha_interface.LiveSetInformation.Host. state_file_corrupted
1436- || (not
1437- master
1438- .Xha_interface.LiveSetInformation.Host. state_file_access
1439- )
1440- || master.Xha_interface.LiveSetInformation.Host. excluded
1447+ || (not master.Host. liveness)
1448+ || master.Host. state_file_corrupted
1449+ || (not master.Host. state_file_access)
1450+ || master.Host. excluded
14411451 then (
14421452 error " Existing master has failed during HA enable process" ;
14431453 failwith " Existing master failed during HA enable process"
0 commit comments