From 9c1183f9460d1f395097ed13e55a9c3e23b5e24c Mon Sep 17 00:00:00 2001 From: Diego Ciangottini Date: Fri, 31 Oct 2025 12:23:01 +0100 Subject: [PATCH 1/9] Add wait time for cluster readiness in main.go Added a sleep period to ensure the cluster is ready before proceeding with CSR approval and tests. Signed-off-by: Diego Ciangottini --- ci/main.go | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/ci/main.go b/ci/main.go index 310c5943..49800e27 100644 --- a/ci/main.go +++ b/ci/main.go @@ -858,6 +858,8 @@ func (m *Interlink) Test( return nil, err } + time.Sleep(60 * time.Second) // wait for cluster to be ready + // Automate CSR approval for testing - required for mTLS functionality and log access c = c.WithExec([]string{"bash", "-c", "kubectl get csr -o name | xargs -r kubectl certificate approve"}) @@ -900,6 +902,9 @@ func (m *Interlink) TestMTLS( if err != nil { return nil, err } + + time.Sleep(60 * time.Second) // wait for cluster to be ready + // Automate CSR approval for testing - required for mTLS functionality and log access c = c.WithExec([]string{"bash", "-c", "kubectl get csr -o name | xargs -r kubectl certificate approve"}) From d927f137307768bdf110413a9668b7d8354d2bb1 Mon Sep 17 00:00:00 2001 From: Diego Ciangottini Date: Thu, 6 Nov 2025 09:49:08 +0000 Subject: [PATCH 2/9] update dagger version --- .github/workflows/ci.yaml | 2 +- ci/dagger.json | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 242dc1cb..67fb3ea3 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -38,4 +38,4 @@ jobs: verb: call args: -s --name slurm-test-mtls build-images new-interlink-mtls test stdout cloud-token: ${{ secrets.DAGGER_CLOUD_TOKEN }} - version: "0.19.2" + version: "0.19.4" diff --git a/ci/dagger.json b/ci/dagger.json index a35ba4d3..646166be 100644 --- a/ci/dagger.json +++ b/ci/dagger.json @@ -1,6 +1,6 @@ { "name": "interlink", - "engineVersion": "v0.19.2", + "engineVersion": "v0.19.4", "sdk": { "source": "go" }, From 36115b558a8ed7039bbdb1e1eb5ee4c42c575be7 Mon Sep 17 00:00:00 2001 From: Diego Ciangottini Date: Thu, 6 Nov 2025 13:50:05 +0000 Subject: [PATCH 3/9] update slurm plugin to 0.5.3-pre1 --- plugins/slurm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plugins/slurm b/plugins/slurm index e2a1cab1..666272ac 160000 --- a/plugins/slurm +++ b/plugins/slurm @@ -1 +1 @@ -Subproject commit e2a1cab131bf1e040d81e903dbdb3c0d2d578c81 +Subproject commit 666272ac088b832bcb1ca26d5d834cae70390572 From 058148d02583f69cdf7f1aa5bd5e984a539fc425 Mon Sep 17 00:00:00 2001 From: Diego Ciangottini Date: Thu, 6 Nov 2025 14:32:53 +0000 Subject: [PATCH 4/9] add wait for node to be ready --- ci/main.go | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/ci/main.go b/ci/main.go index 49800e27..337e7c34 100644 --- a/ci/main.go +++ b/ci/main.go @@ -858,11 +858,12 @@ func (m *Interlink) Test( return nil, err } - time.Sleep(60 * time.Second) // wait for cluster to be ready - // Automate CSR approval for testing - required for mTLS functionality and log access c = c.WithExec([]string{"bash", "-c", "kubectl get csr -o name | xargs -r kubectl certificate approve"}) + // Wait for virtual-kubelet node to be ready before running tests + c = c.WithExec([]string{"kubectl", "wait", "--for=condition=Ready", "node/virtual-kubelet", "--timeout=300s"}) + result := c.WithExec([]string{"bash", "-c", "source .venv/bin/activate && export KUBECONFIG=/.kube/config && pytest -vk 'not rclone and not limits'"}) //_ = c.WithExec([]string{"bash", "-c", "source .venv/bin/activate && export KUBECONFIG=/.kube/config && pytest -vk 'hello'"}) // result := c.WithExec([]string{"bash", "-c", "source .venv/bin/activate && export KUBECONFIG=/.kube/config && pytest -vk 'hello'"}) @@ -903,15 +904,14 @@ func (m *Interlink) TestMTLS( return nil, err } - time.Sleep(60 * time.Second) // wait for cluster to be ready - // Automate CSR approval for testing - required for mTLS functionality and log access c = c.WithExec([]string{"bash", "-c", "kubectl get csr -o name | xargs -r kubectl certificate approve"}) + // Wait for virtual-kubelet node to be ready before running tests + c = c.WithExec([]string{"kubectl", "wait", "--for=condition=Ready", "node/virtual-kubelet", "--timeout=300s"}) + // First run basic tests to ensure setup works result := c.WithExec([]string{"bash", "-c", "source .venv/bin/activate && export KUBECONFIG=/.kube/config && pytest -v -k 'hello'"}). - // Wait for virtual node to be ready - WithExec([]string{"bash", "-c", "kubectl wait --for=condition=Ready node/virtual-kubelet --timeout=300s"}). // Automate CSR approval for testing - required for mTLS functionality WithExec([]string{"bash", "-c", "kubectl get csr -o name | xargs -r kubectl certificate approve"}). // Create a test pod for getLogs testing From 825104e8fefd593b1b42a0ccb95452534802853a Mon Sep 17 00:00:00 2001 From: Diego Ciangottini Date: Thu, 6 Nov 2025 14:34:40 +0000 Subject: [PATCH 5/9] add 19.4 for all workflows --- .github/workflows/ci.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 67fb3ea3..71cb38d5 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -22,7 +22,7 @@ jobs: verb: call args: -s --name unit-tests unit-test cloud-token: ${{ secrets.DAGGER_CLOUD_TOKEN }} - version: "0.19.2" + version: "0.19.4" - name: Integration Test uses: dagger/dagger-for-github@v7 with: @@ -30,7 +30,7 @@ jobs: verb: call args: -s --name slurm-test build-images new-interlink test stdout cloud-token: ${{ secrets.DAGGER_CLOUD_TOKEN }} - version: "0.19.2" + version: "0.19.4" - name: Integration Test mTLS uses: dagger/dagger-for-github@v7 with: From fcd5ca8ed284809a425f0ce207fd15e9fca32a97 Mon Sep 17 00:00:00 2001 From: Diego Ciangottini Date: Thu, 6 Nov 2025 14:49:16 +0000 Subject: [PATCH 6/9] robust get node CI --- ci/main.go | 26 ++++++++++++++++++++++---- 1 file changed, 22 insertions(+), 4 deletions(-) diff --git a/ci/main.go b/ci/main.go index 337e7c34..b14539e9 100644 --- a/ci/main.go +++ b/ci/main.go @@ -861,8 +861,17 @@ func (m *Interlink) Test( // Automate CSR approval for testing - required for mTLS functionality and log access c = c.WithExec([]string{"bash", "-c", "kubectl get csr -o name | xargs -r kubectl certificate approve"}) - // Wait for virtual-kubelet node to be ready before running tests - c = c.WithExec([]string{"kubectl", "wait", "--for=condition=Ready", "node/virtual-kubelet", "--timeout=300s"}) + // Wait for virtual-kubelet node to be ready before running tests (robust version that waits for node creation) + c = c.WithExec([]string{"bash", "-c", ` +for i in {1..60}; do + if kubectl get node virtual-kubelet &>/dev/null; then + echo "Virtual-kubelet node found, waiting for Ready condition..." + kubectl wait --for=condition=Ready node/virtual-kubelet --timeout=240s && break + fi + echo "Waiting for virtual-kubelet node to be created... ($i/60)" + sleep 5 +done +`}) result := c.WithExec([]string{"bash", "-c", "source .venv/bin/activate && export KUBECONFIG=/.kube/config && pytest -vk 'not rclone and not limits'"}) //_ = c.WithExec([]string{"bash", "-c", "source .venv/bin/activate && export KUBECONFIG=/.kube/config && pytest -vk 'hello'"}) @@ -907,8 +916,17 @@ func (m *Interlink) TestMTLS( // Automate CSR approval for testing - required for mTLS functionality and log access c = c.WithExec([]string{"bash", "-c", "kubectl get csr -o name | xargs -r kubectl certificate approve"}) - // Wait for virtual-kubelet node to be ready before running tests - c = c.WithExec([]string{"kubectl", "wait", "--for=condition=Ready", "node/virtual-kubelet", "--timeout=300s"}) + // Wait for virtual-kubelet node to be ready before running tests (robust version that waits for node creation) + c = c.WithExec([]string{"bash", "-c", ` +for i in {1..60}; do + if kubectl get node virtual-kubelet &>/dev/null; then + echo "Virtual-kubelet node found, waiting for Ready condition..." + kubectl wait --for=condition=Ready node/virtual-kubelet --timeout=240s && break + fi + echo "Waiting for virtual-kubelet node to be created... ($i/60)" + sleep 5 +done +`}) // First run basic tests to ensure setup works result := c.WithExec([]string{"bash", "-c", "source .venv/bin/activate && export KUBECONFIG=/.kube/config && pytest -v -k 'hello'"}). From 730a7161839f3b04525d3cb5b4be303b97d80b1a Mon Sep 17 00:00:00 2001 From: Diego Ciangottini Date: Thu, 6 Nov 2025 15:43:52 +0000 Subject: [PATCH 7/9] fix timeout --- ci/go.mod | 4 ++-- ci/go.sum | 8 ++++---- ci/main.go | 33 ++++++++++++++++++--------------- 3 files changed, 24 insertions(+), 21 deletions(-) diff --git a/ci/go.mod b/ci/go.mod index 1af6432a..e835717e 100644 --- a/ci/go.mod +++ b/ci/go.mod @@ -3,7 +3,7 @@ module dagger/interlink go 1.24.0 require ( - github.com/99designs/gqlgen v0.17.80 + github.com/99designs/gqlgen v0.17.81 github.com/Khan/genqlient v0.8.1 github.com/vektah/gqlparser/v2 v2.5.30 go.opentelemetry.io/otel v1.38.0 @@ -17,7 +17,7 @@ require ( go.opentelemetry.io/otel/trace v1.38.0 go.opentelemetry.io/proto/otlp v1.8.0 golang.org/x/sync v0.17.0 - google.golang.org/grpc v1.75.1 + google.golang.org/grpc v1.76.0 ) require ( diff --git a/ci/go.sum b/ci/go.sum index d49b1da2..cff6a961 100644 --- a/ci/go.sum +++ b/ci/go.sum @@ -1,5 +1,5 @@ -github.com/99designs/gqlgen v0.17.80 h1:S64VF9SK+q3JjQbilgdrM0o4iFQgB54mVQ3QvXEO4Ek= -github.com/99designs/gqlgen v0.17.80/go.mod h1:vgNcZlLwemsUhYim4dC1pvFP5FX0pr2Y+uYUoHFb1ig= +github.com/99designs/gqlgen v0.17.81 h1:kCkN/xVyRb5rEQpuwOHRTYq83i0IuTQg9vdIiwEerTs= +github.com/99designs/gqlgen v0.17.81/go.mod h1:vgNcZlLwemsUhYim4dC1pvFP5FX0pr2Y+uYUoHFb1ig= github.com/Khan/genqlient v0.8.1 h1:wtOCc8N9rNynRLXN3k3CnfzheCUNKBcvXmVv5zt6WCs= github.com/Khan/genqlient v0.8.1/go.mod h1:R2G6DzjBvCbhjsEajfRjbWdVglSH/73kSivC9TLWVjU= github.com/andreyvit/diff v0.0.0-20170406064948-c7f18ee00883 h1:bvNMNQO63//z+xNgfBlViaCIJKLlCJ6/fmUseuG0wVQ= @@ -87,8 +87,8 @@ google.golang.org/genproto/googleapis/api v0.0.0-20250825161204-c5933d9347a5 h1: google.golang.org/genproto/googleapis/api v0.0.0-20250825161204-c5933d9347a5/go.mod h1:j3QtIyytwqGr1JUDtYXwtMXWPKsEa5LtzIFN1Wn5WvE= google.golang.org/genproto/googleapis/rpc v0.0.0-20250825161204-c5933d9347a5 h1:eaY8u2EuxbRv7c3NiGK0/NedzVsCcV6hDuU5qPX5EGE= google.golang.org/genproto/googleapis/rpc v0.0.0-20250825161204-c5933d9347a5/go.mod h1:M4/wBTSeyLxupu3W3tJtOgB14jILAS/XWPSSa3TAlJc= -google.golang.org/grpc v1.75.1 h1:/ODCNEuf9VghjgO3rqLcfg8fiOP0nSluljWFlDxELLI= -google.golang.org/grpc v1.75.1/go.mod h1:JtPAzKiq4v1xcAB2hydNlWI2RnF85XXcV0mhKXr2ecQ= +google.golang.org/grpc v1.76.0 h1:UnVkv1+uMLYXoIz6o7chp59WfQUYA2ex/BXQ9rHZu7A= +google.golang.org/grpc v1.76.0/go.mod h1:Ju12QI8M6iQJtbcsV+awF5a4hfJMLi4X0JLo94ULZ6c= google.golang.org/protobuf v1.36.9 h1:w2gp2mA27hUeUzj9Ex9FBjsBm40zfaDtEWow293U7Iw= google.golang.org/protobuf v1.36.9/go.mod h1:fuxRtAxBytpl4zzqUh6/eyUujkJdNiuEkXntxiD/uRU= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= diff --git a/ci/main.go b/ci/main.go index b14539e9..bb215958 100644 --- a/ci/main.go +++ b/ci/main.go @@ -347,7 +347,7 @@ EOF`}). m.Kubectl = kubectl - dag.Container().From("alpine/helm:3.16.1"). + _, err = dag.Container().From("alpine/helm:3.16.1"). WithMountedFile("/.kube/config", m.KubeConfig). WithDirectory("/helm", helmChart). WithEnvVariable("BUST", time.Now().String()). @@ -364,6 +364,23 @@ EOF`}). "/helm/interlink", "--values", "/manifests/vk_helm_chart.yaml", }).Stdout(ctx) + if err != nil { + return nil, err + } + + _, err = m.Kubectl.WithExec([]string{"bash", "-c", ` +for i in {1..60}; do + if kubectl get node virtual-kubelet &>/dev/null; then + echo "Virtual-kubelet node found, waiting for Ready condition..." + kubectl wait --for=condition=Ready node/virtual-kubelet --timeout=240s && break + fi + echo "Waiting for virtual-kubelet node to be created... ($i/60)" + sleep 5 +done +`}).Stdout(ctx) + if err != nil { + return nil, err + } return m, nil } @@ -916,18 +933,6 @@ func (m *Interlink) TestMTLS( // Automate CSR approval for testing - required for mTLS functionality and log access c = c.WithExec([]string{"bash", "-c", "kubectl get csr -o name | xargs -r kubectl certificate approve"}) - // Wait for virtual-kubelet node to be ready before running tests (robust version that waits for node creation) - c = c.WithExec([]string{"bash", "-c", ` -for i in {1..60}; do - if kubectl get node virtual-kubelet &>/dev/null; then - echo "Virtual-kubelet node found, waiting for Ready condition..." - kubectl wait --for=condition=Ready node/virtual-kubelet --timeout=240s && break - fi - echo "Waiting for virtual-kubelet node to be created... ($i/60)" - sleep 5 -done -`}) - // First run basic tests to ensure setup works result := c.WithExec([]string{"bash", "-c", "source .venv/bin/activate && export KUBECONFIG=/.kube/config && pytest -v -k 'hello'"}). // Automate CSR approval for testing - required for mTLS functionality @@ -950,8 +955,6 @@ spec: args: ["-c", "echo 'mTLS log test started'; sleep 30; echo 'mTLS log test completed'"] restartPolicy: Never EOF`}). - // Wait for pod to start - WithExec([]string{"bash", "-c", "kubectl wait --for=condition=PodReadyForStartup pod/mtls-log-test --timeout=120s || true"}). // Ensure CSR approval before testing logs WithExec([]string{"bash", "-c", "kubectl get csr -o name | xargs -r kubectl certificate approve"}). // Test getLogs endpoint specifically - this should work with mTLS now From 8ad14d92ea82caadc58df0897116cc4cf2b17fe0 Mon Sep 17 00:00:00 2001 From: Diego Ciangottini Date: Thu, 6 Nov 2025 16:55:34 +0000 Subject: [PATCH 8/9] print describe pod vk --- ci/main.go | 1 + 1 file changed, 1 insertion(+) diff --git a/ci/main.go b/ci/main.go index bb215958..fb5ce11f 100644 --- a/ci/main.go +++ b/ci/main.go @@ -370,6 +370,7 @@ EOF`}). _, err = m.Kubectl.WithExec([]string{"bash", "-c", ` for i in {1..60}; do + kubectl describe pod -n interlink if kubectl get node virtual-kubelet &>/dev/null; then echo "Virtual-kubelet node found, waiting for Ready condition..." kubectl wait --for=condition=Ready node/virtual-kubelet --timeout=240s && break From 79f8ee200947ea0e9018314c1fa079acae67e8fb Mon Sep 17 00:00:00 2001 From: Diego Ciangottini Date: Tue, 2 Dec 2025 15:15:23 +0100 Subject: [PATCH 9/9] works on my machine Signed-off-by: Diego Ciangottini --- ci/main.go | 17 ++--------------- 1 file changed, 2 insertions(+), 15 deletions(-) diff --git a/ci/main.go b/ci/main.go index fb5ce11f..b3e3c9b7 100644 --- a/ci/main.go +++ b/ci/main.go @@ -358,6 +358,8 @@ EOF`}). WithExec([]string{ "helm", "install", + "--wait", + "--timeout", "600s", "--create-namespace", "-n", "interlink", "virtual-node", @@ -368,21 +370,6 @@ EOF`}). return nil, err } - _, err = m.Kubectl.WithExec([]string{"bash", "-c", ` -for i in {1..60}; do - kubectl describe pod -n interlink - if kubectl get node virtual-kubelet &>/dev/null; then - echo "Virtual-kubelet node found, waiting for Ready condition..." - kubectl wait --for=condition=Ready node/virtual-kubelet --timeout=240s && break - fi - echo "Waiting for virtual-kubelet node to be created... ($i/60)" - sleep 5 -done -`}).Stdout(ctx) - if err != nil { - return nil, err - } - return m, nil }