Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
141 changes: 141 additions & 0 deletions pkg/controller/node/node_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import (
"fmt"
"reflect"
"sort"
"strings"
"time"

helpers "github.com/openshift/machine-config-operator/pkg/helpers"
Expand Down Expand Up @@ -265,6 +266,14 @@ func (ctrl *Controller) Run(workers int, stopCh <-chan struct{}) {
klog.Info("Starting MachineConfigController-NodeController")
defer klog.Info("Shutting down MachineConfigController-NodeController")

// TODO (MCO-1775): Once ImageModeStatusReporting has been GA for an entire release version
// (for example >=4.22.0), the below migration logic can be removed.
// Perform one-time migration from legacy MachineConfigNodeUpdateFilesAndOS condition
// to new ImageModeStatusReporting conditions when feature gate is enabled
if ctrl.fgHandler.Enabled(features.FeatureGateImageModeStatusReporting) {
go ctrl.performImageModeStatusReportingConditionMigration()
}

for i := 0; i < workers; i++ {
go wait.Until(ctrl.worker, time.Second, stopCh)
}
Expand Down Expand Up @@ -1621,3 +1630,135 @@ func (ctrl *Controller) isConfigOrBuildPresent(mosc *mcfgv1.MachineOSConfig, mos
func (ctrl *Controller) isConfigAndBuildPresent(mosc *mcfgv1.MachineOSConfig, mosb *mcfgv1.MachineOSBuild) bool {
return (mosc != nil && mosb != nil)
}

// migrateMCNConditionsToImageModeStatusReporting migrates MCN condition formats from the legacy
// MachineConfigNodeUpdateFilesAndOS condition to the new ImageModeStatusReporting conditions
// (MachineConfigNodeUpdateFiles, MachineConfigNodeUpdateOS, and MachineConfigNodeImagePulledFromRegistry).
// Removes the legacy condition and adds the new conditions with appropriate default values.
// Returns the number of MCNs that had their conditions migrated.
func (ctrl *Controller) migrateMCNConditionsToImageModeStatusReporting() (int, error) {
// Get all MachineConfigNodes
mcns, err := ctrl.client.MachineconfigurationV1().MachineConfigNodes().List(context.TODO(), metav1.ListOptions{})
if err != nil {
return 0, fmt.Errorf("failed to list MachineConfigNodes for condition migration: %w", err)
}

// Loop through all the cluster's MCNs and migrate them if needed
migratedCount := 0
for _, mcn := range mcns.Items {
// Check if the legacy condition exists and filter it out
hasLegacyCondition := false
// If we need to clean the legacy condition, the new conditions should not exist, but we
// will check to fully prevent condition duplication
needsUpdateFiles := true
needsUpdateOS := true
needsImagePulledFromRegistry := true
var newConditions []metav1.Condition
for _, condition := range mcn.Status.Conditions {
//nolint:gocritic // (ijanssen) the linter thinks this block would be clearer as a switch statement, but I disagree
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

+1 on your opinion

if condition.Type == string(mcfgv1.MachineConfigNodeUpdateFilesAndOS) {
hasLegacyCondition = true
klog.V(4).Infof("Removing legacy MachineConfigNodeUpdateFilesAndOS condition from MCN %s", mcn.Name)
continue // Skip adding this condition to the new list
} else if condition.Type == string(mcfgv1.MachineConfigNodeUpdateFiles) {
needsUpdateFiles = false
} else if condition.Type == string(mcfgv1.MachineConfigNodeUpdateOS) {
needsUpdateOS = false
} else if condition.Type == string(mcfgv1.MachineConfigNodeImagePulledFromRegistry) {
needsImagePulledFromRegistry = false
}

newConditions = append(newConditions, condition)
}

// Only update the MCN if we found and removed the legacy condition or if any new
// conditions need to be added
if hasLegacyCondition || needsUpdateOS || needsUpdateFiles || needsImagePulledFromRegistry {
// Add the new ImageModeStatusReporting conditions with default values only if they don't exist
now := metav1.Now()

if needsUpdateFiles {
defaultCondition := metav1.Condition{
Type: string(mcfgv1.MachineConfigNodeUpdateFiles),
Message: fmt.Sprintf("This node has not yet entered the %s phase", string(mcfgv1.MachineConfigNodeUpdateFiles)),
Reason: "NotYetOccurred",
LastTransitionTime: now,
Status: metav1.ConditionFalse,
}
newConditions = append(newConditions, defaultCondition)
}

if needsUpdateOS {
defaultCondition := metav1.Condition{
Type: string(mcfgv1.MachineConfigNodeUpdateOS),
Message: fmt.Sprintf("This node has not yet entered the %s phase", string(mcfgv1.MachineConfigNodeUpdateOS)),
Reason: "NotYetOccurred",
LastTransitionTime: now,
Status: metav1.ConditionFalse,
}
newConditions = append(newConditions, defaultCondition)
}

if needsImagePulledFromRegistry {
defaultCondition := metav1.Condition{
Type: string(mcfgv1.MachineConfigNodeImagePulledFromRegistry),
Message: fmt.Sprintf("This node has not yet entered the %s phase", string(mcfgv1.MachineConfigNodeImagePulledFromRegistry)),
Reason: "NotYetOccurred",
LastTransitionTime: now,
Status: metav1.ConditionFalse,
}
newConditions = append(newConditions, defaultCondition)
}

mcnCopy := mcn.DeepCopy()
mcnCopy.Status.Conditions = newConditions
_, err = ctrl.client.MachineconfigurationV1().MachineConfigNodes().UpdateStatus(context.TODO(), mcnCopy, metav1.UpdateOptions{})
if err != nil {
return migratedCount, fmt.Errorf("failed to update MCN %s during condition migration to ImageModeStatusReporting: %w", mcn.Name, err)
}
migratedCount++

// Create descriptive log message based on what was done
var actions []string
if hasLegacyCondition {
actions = append(actions, "removed legacy MachineConfigNodeUpdateFilesAndOS condition")
}
if needsUpdateFiles || needsUpdateOS || needsImagePulledFromRegistry {
var addedConditions []string
if needsUpdateFiles {
addedConditions = append(addedConditions, "MachineConfigNodeUpdateFiles")
}
if needsUpdateOS {
addedConditions = append(addedConditions, "MachineConfigNodeUpdateOS")
}
if needsImagePulledFromRegistry {
addedConditions = append(addedConditions, "MachineConfigNodeImagePulledFromRegistry")
}
actions = append(actions, fmt.Sprintf("added %s condition(s)", strings.Join(addedConditions, ", ")))
}

klog.Infof("Successfully migrated conditions for MCN %s: %s", mcn.Name, strings.Join(actions, " and "))
}
}

return migratedCount, nil
}

// performImageModeStatusReportingConditionMigration runs once at controller startup to migrate MCN conditions
// from legacy MachineConfigNodeUpdateFilesAndOS condition to new ImageModeStatusReporting condition format.
// This runs in a goroutine to avoid blocking controller startup.
func (ctrl *Controller) performImageModeStatusReportingConditionMigration() {
klog.Info("Starting one-time MCN condition migration to ImageModeStatusReporting format")

migratedCount, err := ctrl.migrateMCNConditionsToImageModeStatusReporting()
if err != nil {
klog.Errorf("Failed to migrate MCN condition formats to ImageModeStatusReporting: %v", err)
return
}

if migratedCount > 0 {
klog.Infof("Completed MCN condition migration to ImageModeStatusReporting format for %d total nodes", migratedCount)
} else {
klog.Info("No MCN condition migration to ImageModeStatusReporting format required")
}
}
176 changes: 149 additions & 27 deletions pkg/daemon/update.go
Original file line number Diff line number Diff line change
Expand Up @@ -1095,30 +1095,87 @@ func (dn *Daemon) update(oldConfig, newConfig *mcfgv1.MachineConfig, skipCertifi
files += f.Path + " "
}

// TODO (MCO-1775): Once ImageModeStatusReporting is GA, clean up the below logic. Updates to
// the `MachineConfigNodeUpdateFilesAndOS` condition will no longer be necessary and should be
// fully replaced by updates to the individual `MachineConfigNodeUpdateFiles` and
// `MachineConfigNodeUpdateOS` conditions.
imageModeStatusReportingEnabled := dn.fgHandler != nil && dn.fgHandler.Enabled(features.FeatureGateImageModeStatusReporting)
updatesNeeded := []string{"not", "not"}
if diff.passwd {
fileUpdate := false
osUpdate := false
// Handle file updates case
if diff.passwd || diff.files {
updatesNeeded[1] = ""
fileUpdate = true
// When ImageModeStatusReporting is enabled, use the specific `MachineConfigNodeUpdateFiles` condition for file updates
if imageModeStatusReportingEnabled {
err = upgrademonitor.GenerateAndApplyMachineConfigNodes(
&upgrademonitor.Condition{State: mcfgv1.MachineConfigNodeUpdateExecuted, Reason: string(mcfgv1.MachineConfigNodeUpdateFiles), Message: fmt.Sprintf("Updating the Files on disk as a part of the in progress phase")},
&upgrademonitor.Condition{State: mcfgv1.MachineConfigNodeUpdateFiles, Reason: fmt.Sprintf("%s%s", string(mcfgv1.MachineConfigNodeUpdateExecuted), string(mcfgv1.MachineConfigNodeUpdateFiles)), Message: fmt.Sprintf("Applying files to node.")},
metav1.ConditionUnknown,
metav1.ConditionUnknown,
dn.node,
dn.mcfgClient,
dn.fgHandler,
pool,
)
}
if err != nil {
klog.Errorf("Error making MCN for Updating Files: %v", err)
}
}
// Handle OS updates case
if diff.osUpdate || diff.extensions || diff.kernelType {
osUpdate = true
updatesNeeded[0] = ""
// When ImageModeStatusReporting is enabled, use the specific `MachineConfigNodeUpdateOS` condition for OS updates
if imageModeStatusReportingEnabled {
err = upgrademonitor.GenerateAndApplyMachineConfigNodes(
&upgrademonitor.Condition{State: mcfgv1.MachineConfigNodeUpdateExecuted, Reason: string(mcfgv1.MachineConfigNodeUpdateOS), Message: fmt.Sprintf("Updating the OS on disk as a part of the in progress phase")},
&upgrademonitor.Condition{State: mcfgv1.MachineConfigNodeUpdateOS, Reason: fmt.Sprintf("%s%s", string(mcfgv1.MachineConfigNodeUpdateExecuted), string(mcfgv1.MachineConfigNodeUpdateOS)), Message: fmt.Sprintf("Applying new OS config to node.")},
metav1.ConditionUnknown,
metav1.ConditionUnknown,
dn.node,
dn.mcfgClient,
dn.fgHandler,
pool,
)
}
if err != nil {
klog.Errorf("Error making MCN for Updating Files: %v", err)
}
}

err = upgrademonitor.GenerateAndApplyMachineConfigNodes(
&upgrademonitor.Condition{State: mcfgv1.MachineConfigNodeUpdateExecuted, Reason: string(mcfgv1.MachineConfigNodeUpdateFilesAndOS), Message: fmt.Sprintf("Updating the Files and OS on disk as a part of the in progress phase")},
&upgrademonitor.Condition{State: mcfgv1.MachineConfigNodeUpdateFilesAndOS, Reason: fmt.Sprintf("%s%s", string(mcfgv1.MachineConfigNodeUpdateExecuted), string(mcfgv1.MachineConfigNodeUpdateFilesAndOS)), Message: fmt.Sprintf("Applying files and new OS config to node. OS will %s need an update. SSH Keys will %s need an update", updatesNeeded[0], updatesNeeded[1])},
metav1.ConditionUnknown,
metav1.ConditionUnknown,
dn.node,
dn.mcfgClient,
dn.fgHandler,
pool,
)
if err != nil {
klog.Errorf("Error making MCN for Updating Files and OS: %v", err)
if !imageModeStatusReportingEnabled {
err = upgrademonitor.GenerateAndApplyMachineConfigNodes(
&upgrademonitor.Condition{State: mcfgv1.MachineConfigNodeUpdateExecuted, Reason: string(mcfgv1.MachineConfigNodeUpdateFilesAndOS), Message: fmt.Sprintf("Updating the Files and OS on disk as a part of the in progress phase")},
&upgrademonitor.Condition{State: mcfgv1.MachineConfigNodeUpdateFilesAndOS, Reason: fmt.Sprintf("%s%s", string(mcfgv1.MachineConfigNodeUpdateExecuted), string(mcfgv1.MachineConfigNodeUpdateFilesAndOS)), Message: fmt.Sprintf("Applying files and new OS config to node. OS will %s need an update. SSH Keys will %s need an update", updatesNeeded[0], updatesNeeded[1])},
metav1.ConditionUnknown,
metav1.ConditionUnknown,
dn.node,
dn.mcfgClient,
dn.fgHandler,
pool,
)
if err != nil {
klog.Errorf("Error making MCN for Updating OS: %v", err)
}
}

// update files on disk that need updating
if err := dn.updateFiles(oldIgnConfig, newIgnConfig, skipCertificateWrite); err != nil {
// When ImageModeStatusReporting is enabled, update the `MachineConfigNodeUpdateFiles` condition to report the experienced error
if imageModeStatusReportingEnabled {
err = upgrademonitor.GenerateAndApplyMachineConfigNodes(
&upgrademonitor.Condition{State: mcfgv1.MachineConfigNodeUpdateExecuted, Reason: string(mcfgv1.MachineConfigNodeUpdateFiles), Message: fmt.Sprintf("Error updating the Files on disk as a part of the in progress phase")},
&upgrademonitor.Condition{State: mcfgv1.MachineConfigNodeUpdateFiles, Reason: fmt.Sprintf("%s%s", string(mcfgv1.MachineConfigNodeUpdateExecuted), string(mcfgv1.MachineConfigNodeUpdateFiles)), Message: fmt.Sprintf("Update failed applying files to node: %s", err.Error())},
metav1.ConditionUnknown,
metav1.ConditionUnknown,
dn.node,
dn.mcfgClient,
dn.fgHandler,
pool,
)
}
return err
}

Expand All @@ -1140,8 +1197,20 @@ func (dn *Daemon) update(oldConfig, newConfig *mcfgv1.MachineConfig, skipCertifi
// only update passwd if it has changed (do not nullify)
// we do not need to include SetPasswordHash in this, since only updateSSHKeys has issues on firstboot.
if diff.passwd {
klog.Info("setting passwd")
if err := dn.updateSSHKeys(newIgnConfig.Passwd.Users, oldIgnConfig.Passwd.Users); err != nil {
// When ImageModeStatusReporting is enabled, update the `MachineConfigNodeUpdateFiles` condition to report the experienced error
if imageModeStatusReportingEnabled {
err = upgrademonitor.GenerateAndApplyMachineConfigNodes(
&upgrademonitor.Condition{State: mcfgv1.MachineConfigNodeUpdateExecuted, Reason: string(mcfgv1.MachineConfigNodeUpdateFiles), Message: fmt.Sprintf("Error updating the Files on disk as a part of the in progress phase")},
&upgrademonitor.Condition{State: mcfgv1.MachineConfigNodeUpdateFiles, Reason: fmt.Sprintf("%s%s", string(mcfgv1.MachineConfigNodeUpdateExecuted), string(mcfgv1.MachineConfigNodeUpdateFiles)), Message: fmt.Sprintf("Update failed applying files to node: %s", err.Error())},
metav1.ConditionUnknown,
metav1.ConditionUnknown,
dn.node,
dn.mcfgClient,
dn.fgHandler,
pool,
)
}
return err
}

Expand Down Expand Up @@ -1179,6 +1248,19 @@ func (dn *Daemon) update(oldConfig, newConfig *mcfgv1.MachineConfig, skipCertifi
}

if err := coreOSDaemon.applyOSChanges(*diff, oldConfig, newConfig); err != nil {
// When ImageModeStatusReporting is enabled, update the `MachineConfigNodeUpdateOS` condition to report the experienced error
if imageModeStatusReportingEnabled {
err = upgrademonitor.GenerateAndApplyMachineConfigNodes(
&upgrademonitor.Condition{State: mcfgv1.MachineConfigNodeUpdateExecuted, Reason: string(mcfgv1.MachineConfigNodeUpdateOS), Message: fmt.Sprintf("Error the OS on disk as a part of the in progress phase")},
&upgrademonitor.Condition{State: mcfgv1.MachineConfigNodeUpdateOS, Reason: fmt.Sprintf("%s%s", string(mcfgv1.MachineConfigNodeUpdateExecuted), string(mcfgv1.MachineConfigNodeUpdateOS)), Message: fmt.Sprintf("Update failed applying new OS config to node: %s", err.Error())},
metav1.ConditionUnknown,
metav1.ConditionUnknown,
dn.node,
dn.mcfgClient,
dn.fgHandler,
pool,
)
}
return err
}

Expand Down Expand Up @@ -1225,19 +1307,59 @@ func (dn *Daemon) update(oldConfig, newConfig *mcfgv1.MachineConfig, skipCertifi
}
}()

err = upgrademonitor.GenerateAndApplyMachineConfigNodes(
&upgrademonitor.Condition{State: mcfgv1.MachineConfigNodeUpdateExecuted, Reason: string(mcfgv1.MachineConfigNodeUpdateFilesAndOS), Message: fmt.Sprintf("Updated the Files and OS on disk as a part of the in progress phase")},
&upgrademonitor.Condition{State: mcfgv1.MachineConfigNodeUpdateFilesAndOS, Reason: fmt.Sprintf("%s%s", string(mcfgv1.MachineConfigNodeUpdateExecuted), string(mcfgv1.MachineConfigNodeUpdateFilesAndOS)), Message: fmt.Sprintf("Applied files and new OS config to node. OS did %s need an update. SSH Keys did %s need an update", updatesNeeded[0], updatesNeeded[1])},
metav1.ConditionTrue,
metav1.ConditionTrue,
dn.node,
dn.mcfgClient,
dn.fgHandler,
pool,
)
if err != nil {
klog.Errorf("Error making MCN for Updated Files and OS: %v", err)
// TODO (MCO-1775): Once ImageModeStatusReporting is GA, clean up the below logic. Updates to
// the `MachineConfigNodeUpdateFilesAndOS` condition will no longer be necessary and should be
// fully replaced by updates to the individual `MachineConfigNodeUpdateFiles` and
// `MachineConfigNodeUpdateOS` conditions.
if imageModeStatusReportingEnabled {
// Update MCN for successful file update
if fileUpdate {
err = upgrademonitor.GenerateAndApplyMachineConfigNodes(
&upgrademonitor.Condition{State: mcfgv1.MachineConfigNodeUpdateExecuted, Reason: string(mcfgv1.MachineConfigNodeUpdateFiles), Message: fmt.Sprintf("Updated the Files on disk as a part of the in progress phase")},
&upgrademonitor.Condition{State: mcfgv1.MachineConfigNodeUpdateFiles, Reason: fmt.Sprintf("%s%s", string(mcfgv1.MachineConfigNodeUpdateExecuted), string(mcfgv1.MachineConfigNodeUpdateFiles)), Message: fmt.Sprintf("Applied files. SSH Keys did need an update")},
metav1.ConditionTrue,
metav1.ConditionTrue,
dn.node,
dn.mcfgClient,
dn.fgHandler,
pool,
)
if err != nil {
klog.Errorf("Error making MCN for Updated Files: %v", err)
}
}
// Update MCN for successful OS update
if osUpdate {
err = upgrademonitor.GenerateAndApplyMachineConfigNodes(
&upgrademonitor.Condition{State: mcfgv1.MachineConfigNodeUpdateExecuted, Reason: string(mcfgv1.MachineConfigNodeUpdateOS), Message: fmt.Sprintf("Updated the OS on disk as a part of the in progress phase")},
&upgrademonitor.Condition{State: mcfgv1.MachineConfigNodeUpdateOS, Reason: fmt.Sprintf("%s%s", string(mcfgv1.MachineConfigNodeUpdateExecuted), string(mcfgv1.MachineConfigNodeUpdateOS)), Message: fmt.Sprintf("Applied new OS config to node.")},
metav1.ConditionTrue,
metav1.ConditionTrue,
dn.node,
dn.mcfgClient,
dn.fgHandler,
pool,
)
if err != nil {
klog.Errorf("Error making MCN for Updated OS: %v", err)
}
}
} else {
err = upgrademonitor.GenerateAndApplyMachineConfigNodes(
&upgrademonitor.Condition{State: mcfgv1.MachineConfigNodeUpdateExecuted, Reason: string(mcfgv1.MachineConfigNodeUpdateFilesAndOS), Message: fmt.Sprintf("Updated the Files and OS on disk as a part of the in progress phase")},
&upgrademonitor.Condition{State: mcfgv1.MachineConfigNodeUpdateFilesAndOS, Reason: fmt.Sprintf("%s%s", string(mcfgv1.MachineConfigNodeUpdateExecuted), string(mcfgv1.MachineConfigNodeUpdateFilesAndOS)), Message: fmt.Sprintf("Applied files and new OS config to node. OS did %s need an update. SSH Keys did %s need an update", updatesNeeded[0], updatesNeeded[1])},
metav1.ConditionTrue,
metav1.ConditionTrue,
dn.node,
dn.mcfgClient,
dn.fgHandler,
pool,
)
if err != nil {
klog.Errorf("Error making MCN for Updated Files and OS: %v", err)
}
}

// Node Disruption Policies cannot be used during firstboot as API is not accessible.
if !firstBoot {
return dn.performPostConfigChangeNodeDisruptionAction(nodeDisruptionActions, newConfig.GetName())
Expand Down
Loading