Skip to content

Commit 9a5bf59

Browse files
committed
add timeout to cloud init progress monitoring
Signed-off-by: olalekan odukoya <odukoyaonline@gmail.com>
1 parent d4f4911 commit 9a5bf59

File tree

2 files changed

+68
-49
lines changed

2 files changed

+68
-49
lines changed

pkg/hostagent/hostagent.go

Lines changed: 67 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,9 @@ type HostAgent struct {
7676
guestAgentAliveChOnce sync.Once
7777

7878
showProgress bool // whether to show cloud-init progress
79+
80+
statusMu sync.RWMutex
81+
currentStatus events.Status
7982
}
8083

8184
type options struct {
@@ -284,6 +287,11 @@ func determineSSHLocalPort(confLocalPort int, instName, limaVersion string) (int
284287
func (a *HostAgent) emitEvent(_ context.Context, ev events.Event) {
285288
a.eventEncMu.Lock()
286289
defer a.eventEncMu.Unlock()
290+
291+
a.statusMu.Lock()
292+
a.currentStatus = ev.Status
293+
a.statusMu.Unlock()
294+
287295
if ev.Time.IsZero() {
288296
ev.Time = time.Now()
289297
}
@@ -292,6 +300,17 @@ func (a *HostAgent) emitEvent(_ context.Context, ev events.Event) {
292300
}
293301
}
294302

303+
func (a *HostAgent) emitCloudInitProgressEvent(ctx context.Context, progress *events.CloudInitProgress) {
304+
a.statusMu.RLock()
305+
currentStatus := a.currentStatus
306+
a.statusMu.RUnlock()
307+
308+
currentStatus.CloudInitProgress = progress
309+
310+
ev := events.Event{Status: currentStatus}
311+
a.emitEvent(ctx, ev)
312+
}
313+
295314
func generatePassword(length int) (string, error) {
296315
// avoid any special symbols, to make it easier to copy/paste
297316
return password.Generate(length, length/4, 0, false, false)
@@ -509,7 +528,10 @@ sudo chown -R "${USER}" /run/host-services`
509528
if a.showProgress {
510529
cloudInitDone := make(chan struct{})
511530
go func() {
512-
a.watchCloudInitProgress(ctx)
531+
timeoutCtx, cancel := context.WithTimeout(ctx, 10*time.Minute)
532+
defer cancel()
533+
534+
a.watchCloudInitProgress(timeoutCtx)
513535
close(cloudInitDone)
514536
}()
515537

@@ -816,15 +838,21 @@ func forwardSSH(ctx context.Context, sshConfig *ssh.SSHConfig, port int, local,
816838
}
817839

818840
func (a *HostAgent) watchCloudInitProgress(ctx context.Context) {
841+
exitReason := "Cloud-init monitoring completed successfully"
842+
843+
defer func() {
844+
a.emitCloudInitProgressEvent(context.Background(), &events.CloudInitProgress{
845+
Active: false,
846+
Completed: true,
847+
LogLine: exitReason,
848+
})
849+
logrus.Debug("Cloud-init progress monitoring completed")
850+
}()
851+
819852
logrus.Debug("Starting cloud-init progress monitoring")
820853

821-
a.emitEvent(ctx, events.Event{
822-
Status: events.Status{
823-
SSHLocalPort: a.sshLocalPort,
824-
CloudInitProgress: &events.CloudInitProgress{
825-
Active: true,
826-
},
827-
},
854+
a.emitCloudInitProgressEvent(ctx, &events.CloudInitProgress{
855+
Active: true,
828856
})
829857

830858
maxRetries := 30
@@ -866,11 +894,13 @@ func (a *HostAgent) watchCloudInitProgress(ctx context.Context) {
866894
stdout, err := cmd.StdoutPipe()
867895
if err != nil {
868896
logrus.WithError(err).Warn("Failed to create stdout pipe for cloud-init monitoring")
897+
exitReason = "Failed to create stdout pipe for cloud-init monitoring"
869898
return
870899
}
871900

872901
if err := cmd.Start(); err != nil {
873902
logrus.WithError(err).Warn("Failed to start cloud-init monitoring command")
903+
exitReason = "Failed to start cloud-init monitoring command"
874904
return
875905
}
876906

@@ -883,23 +913,31 @@ func (a *HostAgent) watchCloudInitProgress(ctx context.Context) {
883913
continue
884914
}
885915

886-
if strings.Contains(line, "Cloud-init") && strings.Contains(line, "finished") {
887-
cloudInitFinished = true
916+
if !cloudInitFinished {
917+
if isCloudInitFinished(line) {
918+
logrus.Debug("Cloud-init completion detected via log pattern")
919+
cloudInitFinished = true
920+
}
888921
}
889922

890-
a.emitEvent(ctx, events.Event{
891-
Status: events.Status{
892-
SSHLocalPort: a.sshLocalPort,
893-
CloudInitProgress: &events.CloudInitProgress{
894-
Active: !cloudInitFinished,
895-
LogLine: line,
896-
Completed: cloudInitFinished,
897-
},
898-
},
923+
a.emitCloudInitProgressEvent(ctx, &events.CloudInitProgress{
924+
Active: !cloudInitFinished,
925+
LogLine: line,
926+
Completed: cloudInitFinished,
899927
})
928+
929+
if cloudInitFinished {
930+
logrus.Debug("Breaking from cloud-init monitoring loop - completion detected")
931+
break
932+
}
900933
}
901934

902935
if err := cmd.Wait(); err != nil {
936+
if ctx.Err() == context.DeadlineExceeded {
937+
logrus.Warn("Cloud-init monitoring timed out after 10 minutes")
938+
exitReason = "Cloud-init monitoring timed out after 10 minutes"
939+
return
940+
}
903941
logrus.WithError(err).Debug("SSH command finished (expected when cloud-init completes)")
904942
}
905943

@@ -918,36 +956,24 @@ func (a *HostAgent) watchCloudInitProgress(ctx context.Context) {
918956
lines := strings.Split(string(finalOutput), "\n")
919957
for _, line := range lines {
920958
if strings.TrimSpace(line) != "" {
921-
if strings.Contains(line, "Cloud-init") && strings.Contains(line, "finished") {
922-
cloudInitFinished = true
959+
if !cloudInitFinished {
960+
cloudInitFinished = isCloudInitFinished(line)
923961
}
924962

925-
a.emitEvent(ctx, events.Event{
926-
Status: events.Status{
927-
SSHLocalPort: a.sshLocalPort,
928-
CloudInitProgress: &events.CloudInitProgress{
929-
Active: !cloudInitFinished,
930-
LogLine: line,
931-
Completed: cloudInitFinished,
932-
},
933-
},
963+
a.emitCloudInitProgressEvent(ctx, &events.CloudInitProgress{
964+
Active: !cloudInitFinished,
965+
LogLine: line,
966+
Completed: cloudInitFinished,
934967
})
935968
}
936969
}
937970
}
938971
}
972+
}
939973

940-
a.emitEvent(ctx, events.Event{
941-
Status: events.Status{
942-
SSHLocalPort: a.sshLocalPort,
943-
CloudInitProgress: &events.CloudInitProgress{
944-
Active: false,
945-
Completed: true,
946-
},
947-
},
948-
})
949-
950-
logrus.Debug("Cloud-init progress monitoring completed")
974+
func isCloudInitFinished(line string) bool {
975+
line = strings.ToLower(strings.TrimSpace(line))
976+
return strings.Contains(line, "cloud-init") && strings.Contains(line, "finished")
951977
}
952978

953979
func copyToHost(ctx context.Context, sshConfig *ssh.SSHConfig, port int, local, remote string) error {

pkg/instance/start.go

Lines changed: 1 addition & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -307,7 +307,6 @@ func watchHostAgentEvents(ctx context.Context, inst *store.Instance, haStdoutPat
307307
var (
308308
printedSSHLocalPort bool
309309
receivedRunningEvent bool
310-
cloudInitCompleted bool
311310
err error
312311
)
313312

@@ -331,10 +330,9 @@ func watchHostAgentEvents(ctx context.Context, inst *store.Instance, haStdoutPat
331330
}
332331

333332
if progress.Completed {
334-
cloudInitCompleted = true
333+
logrus.Infof("Cloud-init progress monitoring done.")
335334
}
336335
}
337-
338336
if len(ev.Status.Errors) > 0 {
339337
logrus.Errorf("%+v", ev.Status.Errors)
340338
}
@@ -354,11 +352,6 @@ func watchHostAgentEvents(ctx context.Context, inst *store.Instance, haStdoutPat
354352
return true
355353
}
356354

357-
if showProgress && !cloudInitCompleted {
358-
logrus.Infof("VM is running, waiting for cloud-init to complete...")
359-
return false
360-
}
361-
362355
if *inst.Config.Plain {
363356
logrus.Infof("READY. Run `ssh -F %q %s` to open the shell.", inst.SSHConfigFile, inst.Hostname)
364357
} else {

0 commit comments

Comments
 (0)