@@ -731,6 +731,7 @@ func (c *Controller) syncProfile(tuned *tunedv1.Tuned, nodeName string) error {
731731 profile .Spec .Config .ProviderName = providerName
732732 profile .Spec .Profile = computed .AllProfiles
733733 profile .Status .Conditions = tunedpkg .InitializeStatusConditions ()
734+ delete (c .pc .state .bootcmdline , nodeName ) // bootcmdline retrieved from node annotation is potentially stale, let it resync on node update
734735
735736 klog .V (2 ).Infof ("syncProfile(): updating Profile %s [%s]" , profile .Name , computed .TunedProfileName )
736737 _ , err = c .clients .Tuned .TunedV1 ().Profiles (ntoconfig .WatchNamespace ()).Update (context .TODO (), profile , metav1.UpdateOptions {})
@@ -780,18 +781,17 @@ func (c *Controller) syncMachineConfig(labels map[string]string, profile *tunedv
780781 name := GetMachineConfigNameForPools (pools )
781782 klog .V (2 ).Infof ("syncMachineConfig(): %v" , name )
782783
783- var bootcmdlineSet bool
784784 nodes , err := c .pc .getNodesForPool (pools [0 ])
785785 if err != nil {
786786 return err
787787 }
788788
789- bootcmdline , bootcmdlineSet = c .pc .state .bootcmdline [profile .Name ]
790- if ! bootcmdlineSet {
791- klog .V (2 ).Infof ("syncMachineConfig(): bootcmdline for %s not cached, sync canceled" , profile .Name )
789+ if ok := c .allNodesHaveBootcmdlineSet (nodes ); ! ok {
790+ klog .V (2 ).Infof ("syncMachineConfig(): bootcmdline for %s not cached for all nodes, sync canceled" , profile .Name )
792791 return nil
793792 }
794793
794+ bootcmdline = c .pc .state .bootcmdline [profile .Name ]
795795 if ok := c .allNodesAgreeOnBootcmdline (nodes ); ! ok {
796796 // Log an error and do not requeue, this is a configuration issue.
797797 klog .Errorf ("not all %d Nodes in MCP %v agree on bootcmdline: %s" , len (nodes ), pools [0 ].Name , bootcmdline )
@@ -848,6 +848,36 @@ func (c *Controller) syncMachineConfig(labels map[string]string, profile *tunedv
848848 return nil
849849}
850850
851+ // allNodesHaveBootcmdlineSet returns true if all Nodes in slice 'nodes' have
852+ // their bootcmdline annotation (TunedBootcmdlineAnnotationKey) value cached in
853+ // the profilecalculator's cache. The values in the cache are populated on Node
854+ // updates (regular ones or just resyncs) and removed from the cache on Profile
855+ // updates. Note this is not a bullet-proof solution to false bootcmdline
856+ // conflicts, because we can still have races, such as regular k8s object
857+ // Node resync (or other independent Node updates) right after Profile update.
858+ // While the cached bootcmdline value will be deleted after Profile update, it
859+ // can still be populated by a stale value from Node's annotation because of the
860+ // Node's resync (or other independent updates) before the proper bootcmdline
861+ // is calculated by the TuneD pod and Node's (TunedBootcmdlineAnnotationKey)
862+ // annotation updated. A bullet-proof solution would involve adding a new
863+ // annotation such as tuned.openshift.io/lastProfileObservedGen to the Node
864+ // tied to the Profile and comparing it to the Profile's generation prior to
865+ // updating a MachineConfig. However, we want to avoid putting extra load
866+ // on the API server as much as possible and this simpler solution already
867+ // significantly reduces false reports of bootcmdline conflicts.
868+ func (c * Controller ) allNodesHaveBootcmdlineSet (nodes []* corev1.Node ) bool {
869+ for _ , node := range nodes {
870+ if v , bootcmdlineSet := c .pc .state .bootcmdline [node .Name ]; ! bootcmdlineSet {
871+ klog .V (3 ).Infof ("allNodesHaveBootcmdlineSet(): bootcmdline not set for node %s" , node .Name )
872+ return false
873+ } else {
874+ klog .V (3 ).Infof ("allNodesHaveBootcmdlineSet(): bootcmdline %q set for node %s" , v , node .Name )
875+ }
876+ }
877+
878+ return true
879+ }
880+
851881// allNodesAgreeOnBootcmdline returns true if the current cached annotation 'TunedBootcmdlineAnnotationKey'
852882// of all Nodes in slice 'nodes' has the same value.
853883func (c * Controller ) allNodesAgreeOnBootcmdline (nodes []* corev1.Node ) bool {
@@ -881,6 +911,11 @@ func (c *Controller) syncMachineConfigHyperShift(nodePoolName string, profile *t
881911 return fmt .Errorf ("could not fetch a list of Nodes for NodePool %s: %v" , nodePoolName , err )
882912 }
883913
914+ if ok := c .allNodesHaveBootcmdlineSet (nodes ); ! ok {
915+ klog .V (2 ).Infof ("syncMachineConfigHyperShift(): bootcmdline for %s not cached for all nodes, sync canceled" , profile .Name )
916+ return nil
917+ }
918+
884919 bootcmdline := c .pc .state .bootcmdline [profile .Name ]
885920 if ok := c .allNodesAgreeOnBootcmdline (nodes ); ! ok {
886921 return fmt .Errorf ("not all %d Nodes in NodePool %v agree on bootcmdline: %s" , len (nodes ), nodePoolName , bootcmdline )
0 commit comments