From 94460937563f55f9a06fa190c09f1f46763708f7 Mon Sep 17 00:00:00 2001 From: tanzee Date: Sun, 15 Mar 2026 15:51:50 +0100 Subject: [PATCH 01/18] TEST32 Collaps Config CRD first pass --- go.mod | 10 ++--- go.sum | 24 +++++----- tests/component_test.go | 97 +++++++++++++++++++++++++++++++++++++++++ 3 files changed, 112 insertions(+), 19 deletions(-) diff --git a/go.mod b/go.mod index bd650d52d..2f4abc112 100644 --- a/go.mod +++ b/go.mod @@ -279,7 +279,7 @@ require ( github.com/gorilla/websocket v1.5.4-0.20250319132907-e064f32e3674 // indirect github.com/grafana/pyroscope-go/godeltaprof v0.1.8 // indirect github.com/gregjones/httpcache v0.0.0-20190611155906-901d90724c79 // indirect - github.com/grpc-ecosystem/grpc-gateway/v2 v2.27.5 // indirect + github.com/grpc-ecosystem/grpc-gateway/v2 v2.27.7 // indirect github.com/hashicorp/errwrap v1.1.0 // indirect github.com/hashicorp/go-cleanhttp v0.5.2 // indirect github.com/hashicorp/go-getter v1.7.9 // indirect @@ -439,15 +439,15 @@ require ( go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.63.0 // indirect go.opentelemetry.io/contrib/instrumentation/runtime v0.64.0 // indirect go.opentelemetry.io/otel v1.41.0 // indirect - go.opentelemetry.io/otel/exporters/otlp/otlplog/otlploghttp v0.14.0 // indirect + go.opentelemetry.io/otel/exporters/otlp/otlplog/otlploghttp v0.16.0 // indirect go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetrichttp v1.38.0 // indirect go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.39.0 // indirect go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.39.0 // indirect go.opentelemetry.io/otel/exporters/stdout/stdouttrace v1.38.0 // indirect - go.opentelemetry.io/otel/log v0.15.0 // indirect + go.opentelemetry.io/otel/log v0.16.0 // indirect go.opentelemetry.io/otel/metric v1.41.0 // indirect go.opentelemetry.io/otel/sdk v1.41.0 // indirect - go.opentelemetry.io/otel/sdk/log v0.15.0 // indirect + go.opentelemetry.io/otel/sdk/log v0.16.0 // indirect go.opentelemetry.io/otel/sdk/metric v1.41.0 // indirect go.opentelemetry.io/otel/trace v1.41.0 // indirect go.opentelemetry.io/proto/otlp v1.9.0 // indirect @@ -498,4 +498,4 @@ require ( replace github.com/inspektor-gadget/inspektor-gadget => github.com/matthyx/inspektor-gadget v0.0.0-20260226175242-c524fbad47d9 -replace github.com/kubescape/storage => github.com/k8sstormcenter/storage v0.0.240-0.20260311143456-a042ebaa0ec9 +replace github.com/kubescape/storage => /home/tanzee/gitrepos/vibe/storage diff --git a/go.sum b/go.sum index 147dfa269..8110b8d7a 100644 --- a/go.sum +++ b/go.sum @@ -1438,8 +1438,8 @@ github.com/grpc-ecosystem/grpc-gateway v1.5.0/go.mod h1:RSKVYQBd5MCa4OVpNdGskqpg github.com/grpc-ecosystem/grpc-gateway v1.16.0/go.mod h1:BDjrQk3hbvj6Nolgz8mAMFbcEtjT1g+wF4CSlocrBnw= github.com/grpc-ecosystem/grpc-gateway/v2 v2.7.0/go.mod h1:hgWBS7lorOAVIJEQMi4ZsPv9hVvWI6+ch50m39Pf2Ks= github.com/grpc-ecosystem/grpc-gateway/v2 v2.11.3/go.mod h1:o//XUCC/F+yRGJoPO/VU0GSB0f8Nhgmxx0VIRUvaC0w= -github.com/grpc-ecosystem/grpc-gateway/v2 v2.27.5 h1:jP1RStw811EvUDzsUQ9oESqw2e4RqCjSAD9qIL8eMns= -github.com/grpc-ecosystem/grpc-gateway/v2 v2.27.5/go.mod h1:WXNBZ64q3+ZUemCMXD9kYnr56H7CgZxDBHCVwstfl3s= +github.com/grpc-ecosystem/grpc-gateway/v2 v2.27.7 h1:X+2YciYSxvMQK0UZ7sg45ZVabVZBeBuvMkmuI2V3Fak= +github.com/grpc-ecosystem/grpc-gateway/v2 v2.27.7/go.mod h1:lW34nIZuQ8UDPdkon5fmfp2l3+ZkQ2me/+oecHYLOII= github.com/hashicorp/consul/api v1.11.0/go.mod h1:XjsvQN+RJGWI2TWy1/kqaE16HrR2J/FWgkYjdZQsX9M= github.com/hashicorp/consul/sdk v0.8.0/go.mod h1:GBvyrGALthsZObzUGsfgHZQDXjg4lOjagTIwIR1vPms= github.com/hashicorp/errwrap v1.0.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4= @@ -1590,8 +1590,6 @@ github.com/jstemmer/go-junit-report v0.9.1/go.mod h1:Brl9GWCQeLvo8nXZwPNNblvFj/X github.com/julienschmidt/httprouter v1.2.0/go.mod h1:SYymIcj16QtmaHHD7aYtjjsJG7VTCxuUUipMqKk8s4w= github.com/jung-kurt/gofpdf v1.0.0/go.mod h1:7Id9E/uU8ce6rXgefFLlgrJj/GYY22cpxn+r32jIOes= github.com/jung-kurt/gofpdf v1.0.3-0.20190309125859-24315acbbda5/go.mod h1:7Id9E/uU8ce6rXgefFLlgrJj/GYY22cpxn+r32jIOes= -github.com/k8sstormcenter/storage v0.0.240-0.20260311143456-a042ebaa0ec9 h1:wGXVzdIKvGwUu1KXvUUsCxR1WWC6vzjwLhH2Jq9UuPE= -github.com/k8sstormcenter/storage v0.0.240-0.20260311143456-a042ebaa0ec9/go.mod h1:huYJIFh7TUAlV0W3+cmOh7KoJnWRcbWtGw0kY9YIrjU= github.com/kastenhq/goversion v0.0.0-20230811215019-93b2f8823953 h1:WdAeg/imY2JFPc/9CST4bZ80nNJbiBFCAdSZCSgrS5Y= github.com/kastenhq/goversion v0.0.0-20230811215019-93b2f8823953/go.mod h1:6o+UrvuZWc4UTyBhQf0LGjW9Ld7qJxLz/OqvSOWWlEc= github.com/kballard/go-shellquote v0.0.0-20180428030007-95032a82bc51/go.mod h1:CzGEWj7cYgsdH8dAjBGEr58BoE7ScuLd+fwFZ44+/x8= @@ -1631,8 +1629,6 @@ github.com/kubescape/go-logger v0.0.24 h1:JRNlblY16Ty7hD6MSYNPvWYDxNzVAufsDDX/sZ github.com/kubescape/go-logger v0.0.24/go.mod h1:sMPVCr3VpW/e+SeMaXig5kClGvmZbDXN8YktUeNU4nY= github.com/kubescape/k8s-interface v0.0.204 h1:YkphM8aozocUazKpp0H37By/KZjUjnKeoYqP1b7uBWk= github.com/kubescape/k8s-interface v0.0.204/go.mod h1:d4NVhL81bVXe8yEXlkT4ZHrt3iEppEIN39b8N1oXm5s= -github.com/kubescape/storage v0.0.247 h1:Xf0ScExy7oT/NrZz9732tX/9V3/xudtIeHWKlNxXdxc= -github.com/kubescape/storage v0.0.247/go.mod h1:huYJIFh7TUAlV0W3+cmOh7KoJnWRcbWtGw0kY9YIrjU= github.com/kubescape/workerpool v0.0.0-20250526074519-0e4a4e7f44cf h1:hI0jVwrB6fT4GJWvuUjzObfci1CUknrZdRHfnRVtKM0= github.com/kubescape/workerpool v0.0.0-20250526074519-0e4a4e7f44cf/go.mod h1:Il5baM40PV9cTt4OGdLMeTRRAai3TMfvImu31itIeCM= github.com/kylelemons/godebug v1.1.0 h1:RPNrshWIDI6G2gRW9EHilWtl7Z6Sb1BR0xunSBf0SNc= @@ -2235,8 +2231,8 @@ go.opentelemetry.io/contrib/instrumentation/runtime v0.64.0 h1:/+/+UjlXjFcdDlXxK go.opentelemetry.io/contrib/instrumentation/runtime v0.64.0/go.mod h1:Ldm/PDuzY2DP7IypudopCR3OCOW42NJlN9+mNEroevo= go.opentelemetry.io/otel v1.41.0 h1:YlEwVsGAlCvczDILpUXpIpPSL/VPugt7zHThEMLce1c= go.opentelemetry.io/otel v1.41.0/go.mod h1:Yt4UwgEKeT05QbLwbyHXEwhnjxNO6D8L5PQP51/46dE= -go.opentelemetry.io/otel/exporters/otlp/otlplog/otlploghttp v0.14.0 h1:QQqYw3lkrzwVsoEX0w//EhH/TCnpRdEenKBOOEIMjWc= -go.opentelemetry.io/otel/exporters/otlp/otlplog/otlploghttp v0.14.0/go.mod h1:gSVQcr17jk2ig4jqJ2DX30IdWH251JcNAecvrqTxH1s= +go.opentelemetry.io/otel/exporters/otlp/otlplog/otlploghttp v0.16.0 h1:djrxvDxAe44mJUrKataUbOhCKhR3F8QCyWucO16hTQs= +go.opentelemetry.io/otel/exporters/otlp/otlplog/otlploghttp v0.16.0/go.mod h1:dt3nxpQEiSoKvfTVxp3TUg5fHPLhKtbcnN3Z1I1ePD0= go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetrichttp v1.38.0 h1:Oe2z/BCg5q7k4iXC3cqJxKYg0ieRiOqF0cecFYdPTwk= go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetrichttp v1.38.0/go.mod h1:ZQM5lAJpOsKnYagGg/zV2krVqTtaVdYdDkhMoX6Oalg= go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.39.0 h1:f0cb2XPmrqn4XMy9PNliTgRKJgS5WcL/u0/WRYGz4t0= @@ -2247,16 +2243,16 @@ go.opentelemetry.io/otel/exporters/stdout/stdoutmetric v1.38.0 h1:wm/Q0GAAykXv83 go.opentelemetry.io/otel/exporters/stdout/stdoutmetric v1.38.0/go.mod h1:ra3Pa40+oKjvYh+ZD3EdxFZZB0xdMfuileHAm4nNN7w= go.opentelemetry.io/otel/exporters/stdout/stdouttrace v1.38.0 h1:kJxSDN4SgWWTjG/hPp3O7LCGLcHXFlvS2/FFOrwL+SE= go.opentelemetry.io/otel/exporters/stdout/stdouttrace v1.38.0/go.mod h1:mgIOzS7iZeKJdeB8/NYHrJ48fdGc71Llo5bJ1J4DWUE= -go.opentelemetry.io/otel/log v0.15.0 h1:0VqVnc3MgyYd7QqNVIldC3dsLFKgazR6P3P3+ypkyDY= -go.opentelemetry.io/otel/log v0.15.0/go.mod h1:9c/G1zbyZfgu1HmQD7Qj84QMmwTp2QCQsZH1aeoWDE4= +go.opentelemetry.io/otel/log v0.16.0 h1:DeuBPqCi6pQwtCK0pO4fvMB5eBq6sNxEnuTs88pjsN4= +go.opentelemetry.io/otel/log v0.16.0/go.mod h1:rWsmqNVTLIA8UnwYVOItjyEZDbKIkMxdQunsIhpUMes= go.opentelemetry.io/otel/metric v1.41.0 h1:rFnDcs4gRzBcsO9tS8LCpgR0dxg4aaxWlJxCno7JlTQ= go.opentelemetry.io/otel/metric v1.41.0/go.mod h1:xPvCwd9pU0VN8tPZYzDZV/BMj9CM9vs00GuBjeKhJps= go.opentelemetry.io/otel/sdk v1.41.0 h1:YPIEXKmiAwkGl3Gu1huk1aYWwtpRLeskpV+wPisxBp8= go.opentelemetry.io/otel/sdk v1.41.0/go.mod h1:ahFdU0G5y8IxglBf0QBJXgSe7agzjE4GiTJ6HT9ud90= -go.opentelemetry.io/otel/sdk/log v0.15.0 h1:WgMEHOUt5gjJE93yqfqJOkRflApNif84kxoHWS9VVHE= -go.opentelemetry.io/otel/sdk/log v0.15.0/go.mod h1:qDC/FlKQCXfH5hokGsNg9aUBGMJQsrUyeOiW5u+dKBQ= -go.opentelemetry.io/otel/sdk/log/logtest v0.14.0 h1:Ijbtz+JKXl8T2MngiwqBlPaHqc4YCaP/i13Qrow6gAM= -go.opentelemetry.io/otel/sdk/log/logtest v0.14.0/go.mod h1:dCU8aEL6q+L9cYTqcVOk8rM9Tp8WdnHOPLiBgp0SGOA= +go.opentelemetry.io/otel/sdk/log v0.16.0 h1:e/b4bdlQwC5fnGtG3dlXUrNOnP7c8YLVSpSfEBIkTnI= +go.opentelemetry.io/otel/sdk/log v0.16.0/go.mod h1:JKfP3T6ycy7QEuv3Hj8oKDy7KItrEkus8XJE6EoSzw4= +go.opentelemetry.io/otel/sdk/log/logtest v0.16.0 h1:/XVkpZ41rVRTP4DfMgYv1nEtNmf65XPPyAdqV90TMy4= +go.opentelemetry.io/otel/sdk/log/logtest v0.16.0/go.mod h1:iOOPgQr5MY9oac/F5W86mXdeyWZGleIx3uXO98X2R6Y= go.opentelemetry.io/otel/sdk/metric v1.41.0 h1:siZQIYBAUd1rlIWQT2uCxWJxcCO7q3TriaMlf08rXw8= go.opentelemetry.io/otel/sdk/metric v1.41.0/go.mod h1:HNBuSvT7ROaGtGI50ArdRLUnvRTRGniSUZbxiWxSO8Y= go.opentelemetry.io/otel/trace v1.41.0 h1:Vbk2co6bhj8L59ZJ6/xFTskY+tGAbOnCtQGVVa9TIN0= diff --git a/tests/component_test.go b/tests/component_test.go index 9c0f8e14a..0587c2b2c 100644 --- a/tests/component_test.go +++ b/tests/component_test.go @@ -31,7 +31,10 @@ import ( "github.com/stretchr/testify/require" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" v1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" + "k8s.io/apimachinery/pkg/runtime/schema" "k8s.io/apimachinery/pkg/types" + "k8s.io/client-go/dynamic" "k8s.io/utils/ptr" ) @@ -3147,3 +3150,97 @@ func Test_31_TamperDetectionAlert(t *testing.T) { "R1016 must fire — proves tamper detection alerting works") t.Log("Tamper detection alerting verified successfully") } + +func Test_32_CollapseConfigurationCRD(t *testing.T) { + k8sClient := k8sinterface.NewKubernetesApi() + dynClient, err := dynamic.NewForConfig(k8sClient.K8SConfig) + require.NoError(t, err, "create dynamic client") + + gvr := schema.GroupVersionResource{ + Group: "spdx.softwarecomposition.kubescape.io", + Version: "v1beta1", + Resource: "collapseconfigurations", + } + ctx := context.Background() + name := fmt.Sprintf("test-collapse-%d", time.Now().UnixNano()%10000) + + // Clean up after test + defer func() { + _ = dynClient.Resource(gvr).Delete(ctx, name, metav1.DeleteOptions{}) + }() + + // ── 1. Create a valid CollapseConfiguration ── + obj := &unstructured.Unstructured{ + Object: map[string]interface{}{ + "apiVersion": "spdx.softwarecomposition.kubescape.io/v1beta1", + "kind": "CollapseConfiguration", + "metadata": map[string]interface{}{ + "name": name, + }, + "spec": map[string]interface{}{ + "openDynamicThreshold": 50, + "endpointDynamicThreshold": 100, + "collapseConfigs": []interface{}{ + map[string]interface{}{ + "prefix": "/etc", + "threshold": 10, + }, + map[string]interface{}{ + "prefix": "/var/log", + "threshold": 20, + }, + }, + }, + }, + } + + created, err := dynClient.Resource(gvr).Create(ctx, obj, metav1.CreateOptions{}) + require.NoError(t, err, "create CollapseConfiguration") + t.Logf("Created CollapseConfiguration %q", created.GetName()) + + // ── 2. Get and verify ── + got, err := dynClient.Resource(gvr).Get(ctx, name, metav1.GetOptions{}) + require.NoError(t, err, "get CollapseConfiguration") + + spec := got.Object["spec"].(map[string]interface{}) + assert.EqualValues(t, 50, spec["openDynamicThreshold"]) + assert.EqualValues(t, 100, spec["endpointDynamicThreshold"]) + + configs := spec["collapseConfigs"].([]interface{}) + require.Len(t, configs, 2) + assert.Equal(t, "/etc", configs[0].(map[string]interface{})["prefix"]) + assert.EqualValues(t, 10, configs[0].(map[string]interface{})["threshold"]) + t.Log("Get verified — spec matches") + + // ── 3. List ── + list, err := dynClient.Resource(gvr).List(ctx, metav1.ListOptions{}) + require.NoError(t, err, "list CollapseConfigurations") + found := false + for _, item := range list.Items { + if item.GetName() == name { + found = true + break + } + } + require.True(t, found, "CollapseConfiguration must appear in list") + t.Log("List verified") + + // ── 4. Update ── + got.Object["spec"].(map[string]interface{})["openDynamicThreshold"] = int64(75) + _, err = dynClient.Resource(gvr).Update(ctx, got, metav1.UpdateOptions{}) + require.NoError(t, err, "update CollapseConfiguration") + // storage returns metadata-only in write responses; re-Get to verify + got2, err := dynClient.Resource(gvr).Get(ctx, name, metav1.GetOptions{}) + require.NoError(t, err, "get after update") + updatedSpec := got2.Object["spec"].(map[string]interface{}) + assert.EqualValues(t, 75, updatedSpec["openDynamicThreshold"]) + t.Log("Update verified") + + // ── 5. Delete ── + err = dynClient.Resource(gvr).Delete(ctx, name, metav1.DeleteOptions{}) + require.NoError(t, err, "delete CollapseConfiguration") + + _, err = dynClient.Resource(gvr).Get(ctx, name, metav1.GetOptions{}) + require.Error(t, err, "get after delete must fail") + t.Log("Delete verified — CollapseConfiguration CRD CRUD works end-to-end") +} From e91642fa892b6b83a3f0bfc0968ecb6496129b64 Mon Sep 17 00:00:00 2001 From: tanzee Date: Sun, 15 Mar 2026 15:53:17 +0100 Subject: [PATCH 02/18] TEST32 Collaps Config CRD first pass --- .github/workflows/component-tests.yaml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/component-tests.yaml b/.github/workflows/component-tests.yaml index 163b6c50f..ff38bf2c4 100644 --- a/.github/workflows/component-tests.yaml +++ b/.github/workflows/component-tests.yaml @@ -205,7 +205,8 @@ jobs: Test_28_UserDefinedNetworkNeighborhood, Test_29_SignedApplicationProfile, Test_30_TamperedSignedProfiles, - Test_31_TamperDetectionAlert + Test_31_TamperDetectionAlert, + Test_32_CollapseConfigurationCRD ] steps: - name: Checkout code From 6921fa2d50e8a8fc9f4ee37facfdc0af64a5b02f Mon Sep 17 00:00:00 2001 From: tanzee Date: Sun, 15 Mar 2026 16:49:39 +0100 Subject: [PATCH 03/18] TEST32 Collaps Config CRD second pass --- tests/component_test.go | 331 +++++++++++++++++++++++++++++++--------- 1 file changed, 260 insertions(+), 71 deletions(-) diff --git a/tests/component_test.go b/tests/component_test.go index 0587c2b2c..57740a8d4 100644 --- a/tests/component_test.go +++ b/tests/component_test.go @@ -3156,91 +3156,280 @@ func Test_32_CollapseConfigurationCRD(t *testing.T) { dynClient, err := dynamic.NewForConfig(k8sClient.K8SConfig) require.NoError(t, err, "create dynamic client") - gvr := schema.GroupVersionResource{ + collapseGVR := schema.GroupVersionResource{ Group: "spdx.softwarecomposition.kubescape.io", Version: "v1beta1", Resource: "collapseconfigurations", } ctx := context.Background() - name := fmt.Sprintf("test-collapse-%d", time.Now().UnixNano()%10000) - // Clean up after test - defer func() { - _ = dynClient.Resource(gvr).Delete(ctx, name, metav1.DeleteOptions{}) - }() - - // ── 1. Create a valid CollapseConfiguration ── - obj := &unstructured.Unstructured{ - Object: map[string]interface{}{ - "apiVersion": "spdx.softwarecomposition.kubescape.io/v1beta1", - "kind": "CollapseConfiguration", - "metadata": map[string]interface{}{ - "name": name, - }, - "spec": map[string]interface{}{ - "openDynamicThreshold": 50, - "endpointDynamicThreshold": 100, - "collapseConfigs": []interface{}{ - map[string]interface{}{ - "prefix": "/etc", - "threshold": 10, + t.Run("CRUD", func(t *testing.T) { + name := fmt.Sprintf("test-collapse-%d", time.Now().UnixNano()%10000) + defer func() { + _ = dynClient.Resource(collapseGVR).Delete(ctx, name, metav1.DeleteOptions{}) + }() + + obj := &unstructured.Unstructured{ + Object: map[string]interface{}{ + "apiVersion": "spdx.softwarecomposition.kubescape.io/v1beta1", + "kind": "CollapseConfiguration", + "metadata": map[string]interface{}{"name": name}, + "spec": map[string]interface{}{ + "openDynamicThreshold": 50, + "endpointDynamicThreshold": 100, + "collapseConfigs": []interface{}{ + map[string]interface{}{"prefix": "/etc", "threshold": 10}, + map[string]interface{}{"prefix": "/var/log", "threshold": 20}, }, - map[string]interface{}{ - "prefix": "/var/log", - "threshold": 20, + }, + }, + } + + _, err := dynClient.Resource(collapseGVR).Create(ctx, obj, metav1.CreateOptions{}) + require.NoError(t, err, "create CollapseConfiguration") + + got, err := dynClient.Resource(collapseGVR).Get(ctx, name, metav1.GetOptions{}) + require.NoError(t, err, "get CollapseConfiguration") + spec := got.Object["spec"].(map[string]interface{}) + assert.EqualValues(t, 50, spec["openDynamicThreshold"]) + assert.EqualValues(t, 100, spec["endpointDynamicThreshold"]) + + list, err := dynClient.Resource(collapseGVR).List(ctx, metav1.ListOptions{}) + require.NoError(t, err, "list") + found := false + for _, item := range list.Items { + if item.GetName() == name { + found = true + } + } + require.True(t, found) + + got.Object["spec"].(map[string]interface{})["openDynamicThreshold"] = int64(75) + _, err = dynClient.Resource(collapseGVR).Update(ctx, got, metav1.UpdateOptions{}) + require.NoError(t, err, "update") + got2, err := dynClient.Resource(collapseGVR).Get(ctx, name, metav1.GetOptions{}) + require.NoError(t, err) + assert.EqualValues(t, 75, got2.Object["spec"].(map[string]interface{})["openDynamicThreshold"]) + + require.NoError(t, dynClient.Resource(collapseGVR).Delete(ctx, name, metav1.DeleteOptions{})) + _, err = dynClient.Resource(collapseGVR).Get(ctx, name, metav1.GetOptions{}) + require.Error(t, err) + t.Log("CRUD verified") + }) + + t.Run("CollapseAffectsLearnedProfile", func(t *testing.T) { + // ── 1. Create CollapseConfiguration with aggressive thresholds ── + // Nginx opens many files under /etc and /lib; with low thresholds these should collapse. + ccName := "default" // storage looks for "default" first + defer func() { + _ = dynClient.Resource(collapseGVR).Delete(ctx, ccName, metav1.DeleteOptions{}) + }() + + cc := &unstructured.Unstructured{ + Object: map[string]interface{}{ + "apiVersion": "spdx.softwarecomposition.kubescape.io/v1beta1", + "kind": "CollapseConfiguration", + "metadata": map[string]interface{}{"name": ccName}, + "spec": map[string]interface{}{ + "openDynamicThreshold": int64(50), + "endpointDynamicThreshold": int64(100), + "collapseConfigs": []interface{}{ + map[string]interface{}{"prefix": "/etc", "threshold": int64(3)}, + map[string]interface{}{"prefix": "/usr", "threshold": int64(5)}, + map[string]interface{}{"prefix": "/usr/bin", "threshold": int64(3)}, + map[string]interface{}{"prefix": "/lib", "threshold": int64(5)}, }, }, }, - }, - } + } + _, err := dynClient.Resource(collapseGVR).Create(ctx, cc, metav1.CreateOptions{}) + require.NoError(t, err, "create CollapseConfiguration") + t.Log("Created CollapseConfiguration 'default' with aggressive thresholds") - created, err := dynClient.Resource(gvr).Create(ctx, obj, metav1.CreateOptions{}) - require.NoError(t, err, "create CollapseConfiguration") - t.Logf("Created CollapseConfiguration %q", created.GetName()) + // Verify it was stored + got, err := dynClient.Resource(collapseGVR).Get(ctx, ccName, metav1.GetOptions{}) + require.NoError(t, err) + t.Logf("CollapseConfig spec: %v", got.Object["spec"]) - // ── 2. Get and verify ── - got, err := dynClient.Resource(gvr).Get(ctx, name, metav1.GetOptions{}) - require.NoError(t, err, "get CollapseConfiguration") + // ── 2. Deploy nginx and wait for AP to complete ── + ns := testutils.NewRandomNamespace() + t.Logf("Using namespace %s", ns.Name) + wl, err := testutils.NewTestWorkload(ns.Name, path.Join(utils.CurrentDir(), "resources/nginx-deployment.yaml")) + require.NoError(t, err, "create nginx workload") + require.NoError(t, wl.WaitForReady(30), "wait for nginx ready") - spec := got.Object["spec"].(map[string]interface{}) - assert.EqualValues(t, 50, spec["openDynamicThreshold"]) - assert.EqualValues(t, 100, spec["endpointDynamicThreshold"]) + // Give it a moment to generate file opens + time.Sleep(10 * time.Second) - configs := spec["collapseConfigs"].([]interface{}) - require.Len(t, configs, 2) - assert.Equal(t, "/etc", configs[0].(map[string]interface{})["prefix"]) - assert.EqualValues(t, 10, configs[0].(map[string]interface{})["threshold"]) - t.Log("Get verified — spec matches") + // ── 3. Wait for AP completion ── + err = wl.WaitForApplicationProfileCompletion(30) // 30 retries × 10s = 5 min max + require.NoError(t, err, "wait for AP completion") - // ── 3. List ── - list, err := dynClient.Resource(gvr).List(ctx, metav1.ListOptions{}) - require.NoError(t, err, "list CollapseConfigurations") - found := false - for _, item := range list.Items { - if item.GetName() == name { - found = true - break + // ── 4. Get the AP and inspect Opens ── + ap, err := wl.GetApplicationProfile() + require.NoError(t, err, "get application profile") + require.NotEmpty(t, ap.Spec.Containers, "AP must have containers") + + for _, container := range ap.Spec.Containers { + t.Logf("Container %q: %d opens", container.Name, len(container.Opens)) + var collapsedEtc, collapsedLib, collapsedUsr bool + for _, o := range container.Opens { + t.Logf(" open: %s (flags: %v)", o.Path, o.Flags) + // Check for collapse markers (⋯ or *) in paths + if (strings.HasPrefix(o.Path, "/etc/") && strings.Contains(o.Path, "⋯")) || + (strings.HasPrefix(o.Path, "/etc/") && strings.Contains(o.Path, "*")) { + collapsedEtc = true + } + if (strings.HasPrefix(o.Path, "/lib") && strings.Contains(o.Path, "⋯")) || + (strings.HasPrefix(o.Path, "/lib") && strings.Contains(o.Path, "*")) { + collapsedLib = true + } + if (strings.HasPrefix(o.Path, "/usr/") && strings.Contains(o.Path, "⋯")) || + (strings.HasPrefix(o.Path, "/usr/") && strings.Contains(o.Path, "*")) { + collapsedUsr = true + } + } + // With threshold=3 for /etc, nginx should collapse (it opens >3 files under /etc) + t.Logf("Collapse detected: /etc=%v /lib=%v /usr=%v", collapsedEtc, collapsedLib, collapsedUsr) + assert.True(t, collapsedEtc, "expected /etc paths to be collapsed with threshold=3") } - } - require.True(t, found, "CollapseConfiguration must appear in list") - t.Log("List verified") - - // ── 4. Update ── - got.Object["spec"].(map[string]interface{})["openDynamicThreshold"] = int64(75) - _, err = dynClient.Resource(gvr).Update(ctx, got, metav1.UpdateOptions{}) - require.NoError(t, err, "update CollapseConfiguration") - // storage returns metadata-only in write responses; re-Get to verify - got2, err := dynClient.Resource(gvr).Get(ctx, name, metav1.GetOptions{}) - require.NoError(t, err, "get after update") - updatedSpec := got2.Object["spec"].(map[string]interface{}) - assert.EqualValues(t, 75, updatedSpec["openDynamicThreshold"]) - t.Log("Update verified") - - // ── 5. Delete ── - err = dynClient.Resource(gvr).Delete(ctx, name, metav1.DeleteOptions{}) - require.NoError(t, err, "delete CollapseConfiguration") - - _, err = dynClient.Resource(gvr).Get(ctx, name, metav1.GetOptions{}) - require.Error(t, err, "get after delete must fail") - t.Log("Delete verified — CollapseConfiguration CRD CRUD works end-to-end") + t.Log("Phase 1 verified — CollapseConfiguration affects AP collapsing") + + // ── 5. Update CollapseConfiguration with higher thresholds ── + got, err = dynClient.Resource(collapseGVR).Get(ctx, ccName, metav1.GetOptions{}) + require.NoError(t, err) + got.Object["spec"] = map[string]interface{}{ + "openDynamicThreshold": int64(50), + "endpointDynamicThreshold": int64(100), + "collapseConfigs": []interface{}{ + map[string]interface{}{"prefix": "/etc", "threshold": int64(500)}, + map[string]interface{}{"prefix": "/usr", "threshold": int64(500)}, + map[string]interface{}{"prefix": "/lib", "threshold": int64(500)}, + }, + } + _, err = dynClient.Resource(collapseGVR).Update(ctx, got, metav1.UpdateOptions{}) + require.NoError(t, err, "update CollapseConfig with high thresholds") + t.Log("Updated CollapseConfiguration — thresholds raised to 500") + + // ── 6. Deploy nginx in a fresh namespace ── + ns2 := testutils.NewRandomNamespace() + t.Logf("Using namespace %s for phase 2", ns2.Name) + wl2, err := testutils.NewTestWorkload(ns2.Name, path.Join(utils.CurrentDir(), "resources/nginx-deployment.yaml")) + require.NoError(t, err, "create nginx workload (phase 2)") + require.NoError(t, wl2.WaitForReady(30), "wait for nginx ready (phase 2)") + + time.Sleep(10 * time.Second) + + err = wl2.WaitForApplicationProfileCompletion(30) + require.NoError(t, err, "wait for AP completion (phase 2)") + + // ── 7. Verify paths are NOT collapsed with high thresholds ── + ap2, err := wl2.GetApplicationProfile() + require.NoError(t, err, "get AP (phase 2)") + require.NotEmpty(t, ap2.Spec.Containers) + + for _, container := range ap2.Spec.Containers { + t.Logf("Phase 2 — Container %q: %d opens", container.Name, len(container.Opens)) + var collapsedEtc2 bool + for _, o := range container.Opens { + t.Logf(" open: %s (flags: %v)", o.Path, o.Flags) + if strings.HasPrefix(o.Path, "/etc/") && + (strings.Contains(o.Path, "⋯") || strings.Contains(o.Path, "*")) { + collapsedEtc2 = true + } + } + // With threshold=500, nginx should NOT collapse /etc (it opens far fewer than 500 files) + t.Logf("Phase 2 collapse detected: /etc=%v", collapsedEtc2) + assert.False(t, collapsedEtc2, "expected /etc paths NOT collapsed with threshold=500") + } + t.Log("Phase 2 verified — higher thresholds prevent collapsing") + }) + + t.Run("StressTest200Entries", func(t *testing.T) { + // Create a CollapseConfiguration with 200 entries at various depths + ccName := "default" + defer func() { + _ = dynClient.Resource(collapseGVR).Delete(ctx, ccName, metav1.DeleteOptions{}) + }() + + // Generate 200 entries with diverse prefixes and depths + entries := make([]interface{}, 200) + prefixes := []string{ + "/etc", "/usr", "/usr/bin", "/usr/lib", "/usr/share", "/usr/local", + "/var", "/var/log", "/var/run", "/var/lib", "/var/cache", + "/lib", "/lib64", "/opt", "/home", "/tmp", "/run", "/sys", "/proc", "/dev", + "/srv", "/mnt", "/boot", "/root", "/sbin", "/bin", + } + for i := 0; i < 200; i++ { + base := prefixes[i%len(prefixes)] + depth := i / len(prefixes) // 0-7 depending on iteration + p := base + for d := 0; d < depth; d++ { + p = fmt.Sprintf("%s/sub%d", p, d) + } + threshold := (i%20)*5 + 1 // varied thresholds: 1, 6, 11, ..., 96 + entries[i] = map[string]interface{}{ + "prefix": p, + "threshold": int64(threshold), + } + } + + cc := &unstructured.Unstructured{ + Object: map[string]interface{}{ + "apiVersion": "spdx.softwarecomposition.kubescape.io/v1beta1", + "kind": "CollapseConfiguration", + "metadata": map[string]interface{}{"name": ccName}, + "spec": map[string]interface{}{ + "openDynamicThreshold": int64(50), + "endpointDynamicThreshold": int64(100), + "collapseConfigs": entries, + }, + }, + } + + _, err := dynClient.Resource(collapseGVR).Create(ctx, cc, metav1.CreateOptions{}) + require.NoError(t, err, "create CollapseConfiguration with 200 entries") + + // Verify storage accepted it + got, err := dynClient.Resource(collapseGVR).Get(ctx, ccName, metav1.GetOptions{}) + require.NoError(t, err, "get CollapseConfiguration") + storedConfigs := got.Object["spec"].(map[string]interface{})["collapseConfigs"].([]interface{}) + require.Len(t, storedConfigs, 200, "all 200 entries must be stored") + t.Logf("Stored 200 entries, first: %v, last: %v", storedConfigs[0], storedConfigs[199]) + + // Deploy nginx and learn AP with the 200-entry config + ns := testutils.NewRandomNamespace() + t.Logf("Using namespace %s", ns.Name) + wl, err := testutils.NewTestWorkload(ns.Name, path.Join(utils.CurrentDir(), "resources/nginx-deployment.yaml")) + require.NoError(t, err, "create nginx workload") + require.NoError(t, wl.WaitForReady(30)) + + time.Sleep(10 * time.Second) + + err = wl.WaitForApplicationProfileCompletion(30) + require.NoError(t, err, "wait for AP completion with 200-entry config") + + ap, err := wl.GetApplicationProfile() + require.NoError(t, err, "get AP") + require.NotEmpty(t, ap.Spec.Containers) + + for _, container := range ap.Spec.Containers { + t.Logf("200-entry test — Container %q: %d opens", container.Name, len(container.Opens)) + for _, o := range container.Opens { + t.Logf(" open: %s (flags: %v)", o.Path, o.Flags) + } + // With 200 entries, the /etc entry has threshold=1 (i=0, (0%20)*5+1=1) + // meaning every unique child under /etc should collapse immediately + var collapsedEtc bool + for _, o := range container.Opens { + if strings.HasPrefix(o.Path, "/etc/") && + (strings.Contains(o.Path, "⋯") || strings.Contains(o.Path, "*")) { + collapsedEtc = true + } + } + // /etc has threshold=1, so any child should be collapsed + assert.True(t, collapsedEtc, "/etc should collapse with threshold=1") + } + t.Log("200-entry stress test verified — storage handles large CollapseConfig correctly") + }) } From 0123b32033e045363affe493e82b4dd24719a25a Mon Sep 17 00:00:00 2001 From: tanzee Date: Sun, 15 Mar 2026 17:15:42 +0100 Subject: [PATCH 04/18] Test 32: Removeing undeflated Opens --- tests/component_test.go | 82 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 82 insertions(+) diff --git a/tests/component_test.go b/tests/component_test.go index 57740a8d4..46c0e3f76 100644 --- a/tests/component_test.go +++ b/tests/component_test.go @@ -3345,6 +3345,88 @@ func Test_32_CollapseConfigurationCRD(t *testing.T) { t.Log("Phase 2 verified — higher thresholds prevent collapsing") }) + t.Run("ConsolidationRemovesSubsumedPaths", func(t *testing.T) { + // After collapsing, individual paths that are subsumed by a wildcard/dynamic + // pattern (e.g. /etc/⋯) should be removed from the profile. This test + // verifies that consolidateOpens works end-to-end. + ccName := "default" + defer func() { + _ = dynClient.Resource(collapseGVR).Delete(ctx, ccName, metav1.DeleteOptions{}) + }() + + // Very low threshold for /etc to trigger collapse + cc := &unstructured.Unstructured{ + Object: map[string]interface{}{ + "apiVersion": "spdx.softwarecomposition.kubescape.io/v1beta1", + "kind": "CollapseConfiguration", + "metadata": map[string]interface{}{"name": ccName}, + "spec": map[string]interface{}{ + "openDynamicThreshold": int64(50), + "endpointDynamicThreshold": int64(100), + "collapseConfigs": []interface{}{ + map[string]interface{}{"prefix": "/etc", "threshold": int64(3)}, + map[string]interface{}{"prefix": "/lib", "threshold": int64(3)}, + map[string]interface{}{"prefix": "/usr", "threshold": int64(3)}, + }, + }, + }, + } + _, err := dynClient.Resource(collapseGVR).Create(ctx, cc, metav1.CreateOptions{}) + require.NoError(t, err, "create CollapseConfiguration") + t.Log("Created CollapseConfiguration with threshold=3 for /etc, /lib, /usr") + + // Deploy nginx + ns := testutils.NewRandomNamespace() + t.Logf("Using namespace %s", ns.Name) + wl, err := testutils.NewTestWorkload(ns.Name, path.Join(utils.CurrentDir(), "resources/nginx-deployment.yaml")) + require.NoError(t, err, "create nginx workload") + require.NoError(t, wl.WaitForReady(30), "wait for nginx ready") + + time.Sleep(10 * time.Second) + + err = wl.WaitForApplicationProfileCompletion(30) + require.NoError(t, err, "wait for AP completion") + + ap, err := wl.GetApplicationProfile() + require.NoError(t, err, "get application profile") + require.NotEmpty(t, ap.Spec.Containers, "AP must have containers") + + for _, container := range ap.Spec.Containers { + t.Logf("Container %q: %d opens", container.Name, len(container.Opens)) + + hasDynamicEtc := false + singleSegmentEtcPaths := 0 // paths like /etc/hosts, /etc/passwd (no ⋯ or *) + + for _, o := range container.Opens { + t.Logf(" open: %s (flags: %v)", o.Path, o.Flags) + + if o.Path == "/etc/\u22ef" || o.Path == "/etc/*" { + hasDynamicEtc = true + } + + // Count individual /etc/X paths (single segment after /etc/, no wildcards) + if strings.HasPrefix(o.Path, "/etc/") && + !strings.Contains(o.Path, "\u22ef") && + !strings.Contains(o.Path, "*") { + parts := strings.Split(strings.TrimPrefix(o.Path, "/etc/"), "/") + if len(parts) == 1 { + singleSegmentEtcPaths++ + t.Logf(" *** single-segment /etc path NOT consolidated: %s", o.Path) + } + } + } + + // With threshold=3 and nginx opening many /etc files, we expect /etc/⋯ + assert.True(t, hasDynamicEtc, "expected /etc/⋯ pattern from collapse") + + // The consolidation should have removed individual single-segment /etc paths + // because they are subsumed by /etc/⋯ + assert.Equal(t, 0, singleSegmentEtcPaths, + "individual single-segment /etc/ paths should be consolidated away by /etc/⋯") + } + t.Log("Consolidation verified — subsumed paths removed from profile") + }) + t.Run("StressTest200Entries", func(t *testing.T) { // Create a CollapseConfiguration with 200 entries at various depths ccName := "default" From c9737e19be67ed6eea8ab862e40333070ee9acc9 Mon Sep 17 00:00:00 2001 From: entlein Date: Sun, 15 Mar 2026 19:45:42 +0100 Subject: [PATCH 05/18] adding test Signed-off-by: entlein --- go.mod | 2 - .../templates/node-agent/default-rules.yaml | 2 +- tests/component_test.go | 64 +++++++++++++++++++ 3 files changed, 65 insertions(+), 3 deletions(-) diff --git a/go.mod b/go.mod index 2f4abc112..4eda35231 100644 --- a/go.mod +++ b/go.mod @@ -497,5 +497,3 @@ require ( ) replace github.com/inspektor-gadget/inspektor-gadget => github.com/matthyx/inspektor-gadget v0.0.0-20260226175242-c524fbad47d9 - -replace github.com/kubescape/storage => /home/tanzee/gitrepos/vibe/storage diff --git a/tests/chart/templates/node-agent/default-rules.yaml b/tests/chart/templates/node-agent/default-rules.yaml index 4b245d321..e24ec84b5 100644 --- a/tests/chart/templates/node-agent/default-rules.yaml +++ b/tests/chart/templates/node-agent/default-rules.yaml @@ -122,7 +122,7 @@ spec: uniqueId: "event.comm + '_' + event.name" ruleExpression: - eventType: "dns" - expression: "!event.name.endsWith('.svc.cluster.local.') && !nn.is_domain_in_egress(event.containerId, event.name)" + expression: "!nn.is_domain_in_egress(event.containerId, event.name)" profileDependency: 0 severity: 1 supportPolicy: false diff --git a/tests/component_test.go b/tests/component_test.go index 46c0e3f76..832a8790f 100644 --- a/tests/component_test.go +++ b/tests/component_test.go @@ -2628,6 +2628,70 @@ func Test_28_UserDefinedNetworkNeighborhood(t *testing.T) { require.Greater(t, countByRule(alerts, "R0011"), 0, "DNS MITM: TCP to spoofed IP 128.130.194.56 must fire R0011") }) + + // --------------------------------------------------------------- + // 28g. Service discovery via PTR sweep. + // An attacker inside a pod sweeps the Kubernetes service + // CIDR with reverse DNS lookups (PTR queries) to discover + // service names. Each nslookup generates a PTR query + // for .in-addr.arpa. which is NOT in the NN. + // + // Expected: + // R0005 > 0 — PTR queries are not in NN egress. + // R0011 = 0 — DNS traffic stays on the private cluster + // DNS IP, filtered by is_private_ip(). + // + // Requires: R0005 without the .svc.cluster.local. exclusion + // (the NN is the sole whitelist). + // --------------------------------------------------------------- + t.Run("ptr_sweep_service_discovery", func(t *testing.T) { + wl := setup(t) + k8sClient := k8sinterface.NewKubernetesApi() + + // Discover the service CIDR base from the kubernetes service IP. + kubeSvc, err := k8sClient.KubernetesClient.CoreV1(). + Services("default").Get(context.Background(), "kubernetes", metav1.GetOptions{}) + require.NoError(t, err, "get kubernetes service") + clusterIP := kubeSvc.Spec.ClusterIP + t.Logf("kubernetes service clusterIP: %s", clusterIP) + + // Extract the /24 base (e.g. "10.96.0" from "10.96.0.1"). + parts := strings.Split(clusterIP, ".") + require.Len(t, parts, 4, "clusterIP must be IPv4") + base := strings.Join(parts[:3], ".") + + // Build a sweep command: reverse-lookup 20 IPs in the service CIDR. + // BusyBox nslookup does a PTR query when given an IP address. + // Each PTR query generates event.name = ".in-addr.arpa." + // which is NOT in the NN → R0005 fires. + var sweepCmd strings.Builder + for i := 1; i <= 20; i++ { + if i > 1 { + sweepCmd.WriteString("; ") + } + sweepCmd.WriteString(fmt.Sprintf("nslookup %s.%d 2>/dev/null || true", base, i)) + } + + stdout, stderr, err := wl.ExecIntoPod( + []string{"sh", "-c", sweepCmd.String()}, "curl") + t.Logf("PTR sweep → err=%v stdout=%q stderr=%q", err, stdout, stderr) + + alerts := waitAlerts(t, wl.Namespace) + t.Logf("=== %d alerts ===", len(alerts)) + logAlerts(t, alerts) + + // R0005 fires: PTR queries like "1.0.96.10.in-addr.arpa." are + // not in the NN egress. Multiple should fire (one per unique domain). + r0005Count := countByRule(alerts, "R0005") + t.Logf("R0005 count: %d", r0005Count) + require.Greater(t, r0005Count, 0, + "PTR sweep: reverse DNS queries must trigger R0005") + + // R0011 does NOT fire: all DNS traffic goes to the cluster DNS + // service (private IP), which is filtered by is_private_ip(). + assert.Equal(t, 0, countByRule(alerts, "R0011"), + "PTR sweep: no egress to public IPs — R0011 should not fire") + }) } // Test_29_SignedApplicationProfile verifies that a cryptographically signed From f3a47e425b481ed39481e043417d072def255ea4 Mon Sep 17 00:00:00 2001 From: entlein Date: Sun, 15 Mar 2026 20:56:55 +0100 Subject: [PATCH 06/18] more tests Signed-off-by: entlein --- tests/component_test.go | 240 ++++++++++++++++++++++++++++------ tests/testutils/log_alerts.go | 204 +++++++++++++++++++++++++++++ 2 files changed, 401 insertions(+), 43 deletions(-) create mode 100644 tests/testutils/log_alerts.go diff --git a/tests/component_test.go b/tests/component_test.go index 832a8790f..7da09b44e 100644 --- a/tests/component_test.go +++ b/tests/component_test.go @@ -2256,16 +2256,6 @@ func Test_28_UserDefinedNetworkNeighborhood(t *testing.T) { return wl } - countByRule := func(alerts []testutils.Alert, ruleID string) int { - n := 0 - for _, a := range alerts { - if a.Labels["rule_id"] == ruleID { - n++ - } - } - return n - } - waitAlerts := func(t *testing.T, ns string) []testutils.Alert { t.Helper() var alerts []testutils.Alert @@ -2289,6 +2279,42 @@ func Test_28_UserDefinedNetworkNeighborhood(t *testing.T) { } } + // getLogAlerts fetches the rich alert data from node-agent stdout logs. + getLogAlerts := func(t *testing.T, ns string) []testutils.LogAlert { + t.Helper() + la, err := testutils.GetLogAlerts(ns) + if err != nil { + t.Logf("warning: could not get log alerts: %v", err) + return nil + } + return la + } + + // logRichAlerts logs the rich alert details (domain, IP, port, proto). + logRichAlerts := func(t *testing.T, alerts []testutils.LogAlert) { + t.Helper() + for i, a := range alerts { + extra := "" + if a.Domain != "" { + extra += " domain=" + a.Domain + } + if a.DstIP != "" { + extra += " ip=" + a.DstIP + } + if a.Port != "" { + extra += " port=" + a.Port + } + if a.Proto != "" { + extra += " proto=" + a.Proto + } + if len(a.Addresses) > 0 { + extra += fmt.Sprintf(" addrs=%v", a.Addresses) + } + t.Logf(" [%d] %s(%s) comm=%s container=%s%s", + i, a.AlertName, a.RuleID, a.Comm, a.Container, extra) + } + } + // --------------------------------------------------------------- // 28a. Allowed traffic — fusioncore.ai is in the NN. // No R0005 (DNS) and no R0011 (egress) expected. @@ -2305,12 +2331,18 @@ func Test_28_UserDefinedNetworkNeighborhood(t *testing.T) { t.Logf("curl fusioncore.ai → err=%v stdout=%q stderr=%q", err, stdout, stderr) alerts := waitAlerts(t, wl.Namespace) - t.Logf("=== %d alerts ===", len(alerts)) + t.Logf("=== %d AlertManager alerts ===", len(alerts)) logAlerts(t, alerts) - assert.Equal(t, 0, countByRule(alerts, "R0005"), + richAlerts := getLogAlerts(t, wl.Namespace) + t.Logf("=== %d log alerts ===", len(richAlerts)) + logRichAlerts(t, richAlerts) + + r0005 := testutils.FilterLogAlerts(richAlerts, "R0005") + r0011 := testutils.FilterLogAlerts(richAlerts, "R0011") + assert.Empty(t, r0005, "fusioncore.ai is in NN — should NOT fire R0005") - assert.Equal(t, 0, countByRule(alerts, "R0011"), + assert.Empty(t, r0011, "fusioncore.ai IP is in NN — should NOT fire R0011") }) @@ -2328,11 +2360,28 @@ func Test_28_UserDefinedNetworkNeighborhood(t *testing.T) { wl.ExecIntoPod([]string{"curl", "-sm5", "http://cloudflare.com"}, "curl") alerts := waitAlerts(t, wl.Namespace) - t.Logf("=== %d alerts ===", len(alerts)) + t.Logf("=== %d AlertManager alerts ===", len(alerts)) logAlerts(t, alerts) - require.Greater(t, countByRule(alerts, "R0005"), 0, - "unknown domains must fire R0005") + richAlerts := getLogAlerts(t, wl.Namespace) + r0005 := testutils.FilterLogAlerts(richAlerts, "R0005") + t.Logf("=== R0005 alerts (%d) ===", len(r0005)) + logRichAlerts(t, r0005) + require.NotEmpty(t, r0005, "unknown domains must fire R0005") + + domains := testutils.LogAlertDomains(r0005) + t.Logf("R0005 anomalous domains: %v", domains) + + // Verify at least one of the queried domains triggered. + foundKnown := false + for _, d := range domains { + if strings.Contains(d, "google.com") || strings.Contains(d, "ebpf.io") || strings.Contains(d, "cloudflare.com") { + foundKnown = true + break + } + } + assert.True(t, foundKnown, + "R0005 must fire for at least one of the queried domains (google.com, ebpf.io, cloudflare.com), got: %v", domains) }) // --------------------------------------------------------------- @@ -2345,11 +2394,31 @@ func Test_28_UserDefinedNetworkNeighborhood(t *testing.T) { wl.ExecIntoPod([]string{"curl", "-sm5", "http://1.1.1.1"}, "curl") alerts := waitAlerts(t, wl.Namespace) - t.Logf("=== %d alerts ===", len(alerts)) + t.Logf("=== %d AlertManager alerts ===", len(alerts)) logAlerts(t, alerts) - require.Greater(t, countByRule(alerts, "R0011"), 0, - "IPs not in NN must fire R0011") + richAlerts := getLogAlerts(t, wl.Namespace) + r0011 := testutils.FilterLogAlerts(richAlerts, "R0011") + t.Logf("=== R0011 alerts (%d) ===", len(r0011)) + logRichAlerts(t, r0011) + require.NotEmpty(t, r0011, "IPs not in NN must fire R0011") + + ips := testutils.LogAlertIPs(r0011) + t.Logf("R0011 anomalous IPs: %v", ips) + + // Verify the specific IPs we curled show up. + foundGoogle := false + foundCloudflare := false + for _, ip := range ips { + if ip == "8.8.8.8" { + foundGoogle = true + } + if ip == "1.1.1.1" { + foundCloudflare = true + } + } + assert.True(t, foundGoogle || foundCloudflare, + "R0011 must fire for 8.8.8.8 or 1.1.1.1, got: %v", ips) }) // --------------------------------------------------------------- @@ -2376,11 +2445,27 @@ func Test_28_UserDefinedNetworkNeighborhood(t *testing.T) { t.Logf("curl MITM → err=%v stdout=%q stderr=%q", err, stdout, stderr) alerts := waitAlerts(t, wl.Namespace) - t.Logf("=== %d alerts ===", len(alerts)) + t.Logf("=== %d AlertManager alerts ===", len(alerts)) logAlerts(t, alerts) - require.Greater(t, countByRule(alerts, "R0011"), 0, - "MITM: fusioncore.ai allowed but spoofed IP 8.8.4.4 must fire R0011") + richAlerts := getLogAlerts(t, wl.Namespace) + r0011 := testutils.FilterLogAlerts(richAlerts, "R0011") + t.Logf("=== R0011 alerts (%d) ===", len(r0011)) + logRichAlerts(t, r0011) + require.NotEmpty(t, r0011, "MITM: fusioncore.ai allowed but spoofed IP 8.8.4.4 must fire R0011") + + ips := testutils.LogAlertIPs(r0011) + t.Logf("R0011 anomalous IPs (MITM spoofed): %v", ips) + + found := false + for _, ip := range ips { + if ip == "8.8.4.4" { + found = true + break + } + } + assert.True(t, found, + "R0011 must specifically fire for spoofed IP 8.8.4.4, got: %v", ips) }) // --------------------------------------------------------------- @@ -2492,19 +2577,30 @@ func Test_28_UserDefinedNetworkNeighborhood(t *testing.T) { t.Logf("nslookup (poisoned) → err=%v stdout=%q stderr=%q", err, stdout, stderr) alerts := waitAlerts(t, wl.Namespace) - t.Logf("=== %d alerts ===", len(alerts)) + t.Logf("=== %d AlertManager alerts ===", len(alerts)) logAlerts(t, alerts) + richAlerts := getLogAlerts(t, wl.Namespace) + logRichAlerts(t, richAlerts) + // R0005 does NOT fire: fusioncore.ai is already in the NN // egress list, and BusyBox nslookup does NOT perform PTR // reverse-lookups on result IPs, so no unknown domain is queried. - assert.Equal(t, 0, countByRule(alerts, "R0005"), + r0005 := testutils.FilterLogAlerts(richAlerts, "R0005") + if len(r0005) > 0 { + t.Logf("R0005 unexpected domains: %v", testutils.LogAlertDomains(r0005)) + } + assert.Empty(t, r0005, "DNS MITM: domain is in NN and no PTR lookup — R0005 should not fire") // R0011 does NOT fire: nslookup generates only DNS (UDP) // traffic to the cluster DNS service, which is a private IP // excluded by is_private_ip(). - assert.Equal(t, 0, countByRule(alerts, "R0011"), + r0011 := testutils.FilterLogAlerts(richAlerts, "R0011") + if len(r0011) > 0 { + t.Logf("R0011 unexpected IPs: %v", testutils.LogAlertIPs(r0011)) + } + assert.Empty(t, r0011, "DNS MITM: nslookup has no TCP egress — R0011 should not fire") }) @@ -2614,19 +2710,41 @@ func Test_28_UserDefinedNetworkNeighborhood(t *testing.T) { t.Logf("curl (poisoned DNS) → err=%v stdout=%q stderr=%q", err, stdout, stderr) alerts := waitAlerts(t, wl.Namespace) - t.Logf("=== %d alerts ===", len(alerts)) + t.Logf("=== %d AlertManager alerts ===", len(alerts)) logAlerts(t, alerts) + richAlerts := getLogAlerts(t, wl.Namespace) + // R0005 does NOT fire: fusioncore.ai is already in the NN // egress list, and curl (like BusyBox nslookup) does NOT // perform PTR reverse-lookups on resolved IPs. - assert.Equal(t, 0, countByRule(alerts, "R0005"), + r0005 := testutils.FilterLogAlerts(richAlerts, "R0005") + if len(r0005) > 0 { + t.Logf("R0005 unexpected domains: %v", testutils.LogAlertDomains(r0005)) + } + assert.Empty(t, r0005, "DNS MITM: domain is in NN and no PTR lookup — R0005 should not fire") // R0011 fires: TCP egress to 128.130.194.56 which is NOT // in the NN (NN only allows 162.0.217.171). - require.Greater(t, countByRule(alerts, "R0011"), 0, + r0011 := testutils.FilterLogAlerts(richAlerts, "R0011") + t.Logf("=== R0011 alerts (%d) ===", len(r0011)) + logRichAlerts(t, r0011) + require.NotEmpty(t, r0011, "DNS MITM: TCP to spoofed IP 128.130.194.56 must fire R0011") + + ips := testutils.LogAlertIPs(r0011) + t.Logf("R0011 anomalous IPs (poisoned DNS TCP): %v", ips) + + found := false + for _, ip := range ips { + if ip == "128.130.194.56" { + found = true + break + } + } + assert.True(t, found, + "R0011 must specifically fire for poisoned IP 128.130.194.56, got: %v", ips) }) // --------------------------------------------------------------- @@ -2660,36 +2778,72 @@ func Test_28_UserDefinedNetworkNeighborhood(t *testing.T) { require.Len(t, parts, 4, "clusterIP must be IPv4") base := strings.Join(parts[:3], ".") - // Build a sweep command: reverse-lookup 20 IPs in the service CIDR. + // Build a sweep command: reverse-lookup all 254 IPs in the service CIDR. // BusyBox nslookup does a PTR query when given an IP address. // Each PTR query generates event.name = ".in-addr.arpa." // which is NOT in the NN → R0005 fires. - var sweepCmd strings.Builder - for i := 1; i <= 20; i++ { - if i > 1 { - sweepCmd.WriteString("; ") + // + // We batch into groups of 50 to avoid command-line length limits + // and exec timeouts. + batchSize := 50 + for batchStart := 1; batchStart <= 254; batchStart += batchSize { + batchEnd := batchStart + batchSize - 1 + if batchEnd > 254 { + batchEnd = 254 } - sweepCmd.WriteString(fmt.Sprintf("nslookup %s.%d 2>/dev/null || true", base, i)) + var sweepCmd strings.Builder + for i := batchStart; i <= batchEnd; i++ { + if i > batchStart { + sweepCmd.WriteString("; ") + } + sweepCmd.WriteString(fmt.Sprintf("nslookup %s.%d 2>/dev/null || true", base, i)) + } + stdout, stderr, err := wl.ExecIntoPod( + []string{"sh", "-c", sweepCmd.String()}, "curl") + t.Logf("PTR sweep [%d-%d] → err=%v stdout_len=%d stderr_len=%d", + batchStart, batchEnd, err, len(stdout), len(stderr)) } - stdout, stderr, err := wl.ExecIntoPod( - []string{"sh", "-c", sweepCmd.String()}, "curl") - t.Logf("PTR sweep → err=%v stdout=%q stderr=%q", err, stdout, stderr) - alerts := waitAlerts(t, wl.Namespace) - t.Logf("=== %d alerts ===", len(alerts)) + t.Logf("=== %d AlertManager alerts ===", len(alerts)) logAlerts(t, alerts) + richAlerts := getLogAlerts(t, wl.Namespace) + // R0005 fires: PTR queries like "1.0.96.10.in-addr.arpa." are // not in the NN egress. Multiple should fire (one per unique domain). - r0005Count := countByRule(alerts, "R0005") - t.Logf("R0005 count: %d", r0005Count) - require.Greater(t, r0005Count, 0, + r0005 := testutils.FilterLogAlerts(richAlerts, "R0005") + t.Logf("=== R0005 alerts (%d) ===", len(r0005)) + logRichAlerts(t, r0005) + require.NotEmpty(t, r0005, "PTR sweep: reverse DNS queries must trigger R0005") + domains := testutils.LogAlertDomains(r0005) + t.Logf("R0005 anomalous domains (%d unique): %v", len(domains), domains) + + // Verify the alerts contain in-addr.arpa PTR domains. + ptrCount := 0 + for _, d := range domains { + if strings.Contains(d, "in-addr.arpa") { + ptrCount++ + } + } + t.Logf("R0005 PTR domains (in-addr.arpa): %d out of %d", ptrCount, len(domains)) + assert.Greater(t, ptrCount, 0, + "R0005 must contain in-addr.arpa PTR domains from the sweep") + + // With 254 lookups we expect a meaningful number of R0005 alerts. + // Exact count depends on dedup, but should be more than 1. + assert.Greater(t, len(r0005), 1, + "PTR sweep of 254 IPs should produce multiple R0005 alerts, got %d", len(r0005)) + // R0011 does NOT fire: all DNS traffic goes to the cluster DNS // service (private IP), which is filtered by is_private_ip(). - assert.Equal(t, 0, countByRule(alerts, "R0011"), + r0011 := testutils.FilterLogAlerts(richAlerts, "R0011") + if len(r0011) > 0 { + t.Logf("R0011 unexpected IPs: %v", testutils.LogAlertIPs(r0011)) + } + assert.Empty(t, r0011, "PTR sweep: no egress to public IPs — R0011 should not fire") }) } diff --git a/tests/testutils/log_alerts.go b/tests/testutils/log_alerts.go new file mode 100644 index 000000000..70d11ec31 --- /dev/null +++ b/tests/testutils/log_alerts.go @@ -0,0 +1,204 @@ +package testutils + +import ( + "bytes" + "context" + "encoding/json" + "fmt" + "io" + "strings" + + "github.com/kubescape/k8s-interface/k8sinterface" + v1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/client-go/kubernetes/scheme" +) + +// LogAlert represents a full alert parsed from node-agent stdout exporter logs. +type LogAlert struct { + RuleID string + AlertName string + Namespace string + Container string + PodName string + Comm string + // DNS-specific (R0005) + Domain string + Addresses []string + // Network-specific (R0011) + DstIP string + Port string + Proto string + // Full message from arguments + Message string +} + +// GetLogAlerts reads node-agent pod logs, parses the stdout exporter JSON, +// and returns alerts filtered by namespace. +func GetLogAlerts(namespace string) ([]LogAlert, error) { + k8sClient := k8sinterface.NewKubernetesApi() + + pods, err := k8sClient.KubernetesClient.CoreV1().Pods("").List( + context.TODO(), metav1.ListOptions{ + LabelSelector: "app=node-agent", + }) + if err != nil { + return nil, fmt.Errorf("list node-agent pods: %w", err) + } + + var alerts []LogAlert + for _, pod := range pods.Items { + buf := &bytes.Buffer{} + req := k8sClient.KubernetesClient.CoreV1().RESTClient(). + Get(). + Namespace(pod.Namespace). + Name(pod.Name). + Resource("pods"). + SubResource("log"). + VersionedParams(&v1.PodLogOptions{ + Container: "node-agent", + }, scheme.ParameterCodec) + + rc, err := req.Stream(context.TODO()) + if err != nil { + return nil, fmt.Errorf("stream logs for %s: %w", pod.Name, err) + } + _, err = io.Copy(buf, rc) + rc.Close() + if err != nil { + return nil, fmt.Errorf("read logs for %s: %w", pod.Name, err) + } + + parsed := parseLogAlerts(buf.String(), namespace) + alerts = append(alerts, parsed...) + } + return alerts, nil +} + +// parseLogAlerts extracts alert entries from logrus JSON lines. +func parseLogAlerts(logs, namespace string) []LogAlert { + var alerts []LogAlert + for _, line := range strings.Split(logs, "\n") { + line = strings.TrimSpace(line) + if line == "" { + continue + } + + var entry map[string]interface{} + if err := json.Unmarshal([]byte(line), &entry); err != nil { + continue + } + + ruleID, _ := entry["RuleID"].(string) + if ruleID == "" { + continue + } + + // Extract RuntimeK8sDetails + k8s, _ := entry["RuntimeK8sDetails"].(map[string]interface{}) + ns, _ := k8s["namespace"].(string) + if namespace != "" && ns != namespace { + continue + } + + container, _ := k8s["containerName"].(string) + podName, _ := k8s["podName"].(string) + + // Extract BaseRuntimeMetadata + base, _ := entry["BaseRuntimeMetadata"].(map[string]interface{}) + alertName, _ := base["alertName"].(string) + args, _ := base["arguments"].(map[string]interface{}) + + // Extract identifiers for comm + comm := "" + if ids, ok := base["identifiers"].(map[string]interface{}); ok { + if proc, ok := ids["process"].(map[string]interface{}); ok { + comm, _ = proc["name"].(string) + } + } + + la := LogAlert{ + RuleID: ruleID, + AlertName: alertName, + Namespace: ns, + Container: container, + PodName: podName, + Comm: comm, + } + + if args != nil { + la.Domain, _ = args["domain"].(string) + la.Message, _ = args["message"].(string) + la.Proto, _ = args["protocol"].(string) + if la.Proto == "" { + la.Proto, _ = args["proto"].(string) + } + + // IP: from arguments.ip (R0011) or identifiers.network.dstIP (R0005) + if ip, ok := args["ip"].(string); ok { + la.DstIP = ip + } else if ids, ok := base["identifiers"].(map[string]interface{}); ok { + if net, ok := ids["network"].(map[string]interface{}); ok { + la.DstIP, _ = net["dstIP"].(string) + } + } + + // Port + switch p := args["port"].(type) { + case float64: + la.Port = fmt.Sprintf("%d", int(p)) + case string: + la.Port = p + } + + // Addresses (DNS) + if addrs, ok := args["addresses"].([]interface{}); ok { + for _, a := range addrs { + if s, ok := a.(string); ok { + la.Addresses = append(la.Addresses, s) + } + } + } + } + + alerts = append(alerts, la) + } + return alerts +} + +// FilterLogAlerts returns alerts matching the given rule ID. +func FilterLogAlerts(alerts []LogAlert, ruleID string) []LogAlert { + var out []LogAlert + for _, a := range alerts { + if a.RuleID == ruleID { + out = append(out, a) + } + } + return out +} + +// LogAlertDomains returns unique domain values from a set of log alerts. +func LogAlertDomains(alerts []LogAlert) []string { + seen := map[string]bool{} + var out []string + for _, a := range alerts { + if a.Domain != "" && !seen[a.Domain] { + seen[a.Domain] = true + out = append(out, a.Domain) + } + } + return out +} + +// LogAlertIPs returns unique DstIP values from a set of log alerts. +func LogAlertIPs(alerts []LogAlert) []string { + seen := map[string]bool{} + var out []string + for _, a := range alerts { + if a.DstIP != "" && !seen[a.DstIP] { + seen[a.DstIP] = true + out = append(out, a.DstIP) + } + } + return out +} From 74f8e374101e5a8671500ed676a3ab6c17ae86bd Mon Sep 17 00:00:00 2001 From: entlein Date: Sun, 15 Mar 2026 21:02:46 +0100 Subject: [PATCH 07/18] trigger comp test Signed-off-by: entlein --- .github/workflows/component-tests.yaml | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/.github/workflows/component-tests.yaml b/.github/workflows/component-tests.yaml index ff38bf2c4..481b13475 100644 --- a/.github/workflows/component-tests.yaml +++ b/.github/workflows/component-tests.yaml @@ -30,9 +30,7 @@ name: Node Agent Component Tests on: push: branches: - - feat/signature-verification - - feat/tamperalert - - feat/tamper-detection + - feature/collapse-config-crd workflow_dispatch: inputs: build_image: @@ -54,7 +52,8 @@ on: description: 'Branch/tag/commit of k8sstormcenter/storage to use (leave empty to keep go.mod default)' type: string required: false - default: 'a042ebaa0ec9280d69eac81b5eeaa4d0dfd1c558' + #default: 'a042ebaa0ec9280d69eac81b5eeaa4d0dfd1c558' # that was before rebaseing storage + default: 'e64d59a0e65e891b832f4f29bf770059ff0144b3' #in collapse crd concurrency: group: ${{ github.workflow }}-${{ github.ref }} From 8ae8f9300170c2959fe09522803a39f6c1156b2b Mon Sep 17 00:00:00 2001 From: entlein Date: Sun, 15 Mar 2026 22:18:55 +0100 Subject: [PATCH 08/18] mreging suggested randomx fix from Yakir to test Signed-off-by: entlein --- cmd/main.go | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/cmd/main.go b/cmd/main.go index 494e506cc..86ce54062 100644 --- a/cmd/main.go +++ b/cmd/main.go @@ -227,6 +227,13 @@ func main() { rulesWatcher := ruleswatcher.NewRulesWatcher(k8sClient, ruleCreator, func() { ruleBindingCache.RefreshRuleBindingsRules() }, &cfg) + // Synchronous initial sync so rules are available before tracers start, + // preventing early events from being silently dropped. + if err := rulesWatcher.InitialSync(ctx); err != nil { + logger.L().Ctx(ctx).Warning("failed to perform initial rules sync, early events may be missed", helpers.Error(err)) + } else { + ruleBindingCache.RefreshRuleBindingsRules() + } dWatcher.AddAdaptor(rulesWatcher) } From 1e6e859c65ec62f412aee9d38cca79781de0b550 Mon Sep 17 00:00:00 2001 From: entlein Date: Sun, 15 Mar 2026 22:56:14 +0100 Subject: [PATCH 09/18] replace sotrage dependency; Signed-off-by: entlein --- .github/workflows/component-tests.yaml | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/.github/workflows/component-tests.yaml b/.github/workflows/component-tests.yaml index 481b13475..7fa5d2cc0 100644 --- a/.github/workflows/component-tests.yaml +++ b/.github/workflows/component-tests.yaml @@ -128,6 +128,24 @@ jobs: username: ${{ github.actor }} password: ${{ secrets.GITHUB_TOKEN }} + - name: Set up Go + uses: actions/setup-go@v5 + with: + go-version: "1.25" + + - name: Update storage dependency + if: ${{ inputs.STORAGE_REF != '' }} + env: + STORAGE_REF: ${{ inputs.STORAGE_REF }} + GONOSUMCHECK: "*" + GOFLAGS: "" + run: | + echo "Replacing github.com/kubescape/storage with github.com/k8sstormcenter/storage@${STORAGE_REF}" + go mod edit -replace "github.com/kubescape/storage=github.com/k8sstormcenter/storage@${STORAGE_REF}" + go mod tidy + echo "Resolved storage version:" + grep "k8sstormcenter/storage" go.sum | head -1 + - name: Install IG run: | sudo apt-get update @@ -274,9 +292,9 @@ jobs: run: | sudo sh -c "ulimit -l unlimited" - name: Update storage dependency - #if: ${{ inputs.STORAGE_REF != '' && inputs.STORAGE_REF != 'latest' }} + if: ${{ inputs.STORAGE_REF != '' }} env: - STORAGE_REF: ${{ inputs.STORAGE_REF || 'a042ebaa0ec9280d69eac81b5eeaa4d0dfd1c558' }} + STORAGE_REF: ${{ inputs.STORAGE_REF }} GONOSUMCHECK: "*" GOFLAGS: "" run: | From e18940fabf7e52f4bad641393229f4393d64d255 Mon Sep 17 00:00:00 2001 From: entlein Date: Sun, 15 Mar 2026 22:57:46 +0100 Subject: [PATCH 10/18] replace sotrage dependency; Signed-off-by: entlein --- cmd/main.go | 1 - 1 file changed, 1 deletion(-) diff --git a/cmd/main.go b/cmd/main.go index 86ce54062..c8c118c65 100644 --- a/cmd/main.go +++ b/cmd/main.go @@ -96,7 +96,6 @@ func main() { clusterData.AccountID = credentials.Account logger.L().Info("credentials loaded", helpers.Int("accountLength", len(credentials.Account))) } - // to enable otel, set OTEL_COLLECTOR_SVC=otel-collector:4317 if otelHost, present := os.LookupEnv("OTEL_COLLECTOR_SVC"); present { ctx = logger.InitOtel("node-agent", From b1165eff25c4c501f118a5c5097e41738bf7b741 Mon Sep 17 00:00:00 2001 From: entlein Date: Sun, 15 Mar 2026 23:05:33 +0100 Subject: [PATCH 11/18] replace sotrage dependency; Signed-off-by: entlein --- .github/workflows/component-tests.yaml | 1 - cmd/main.go | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/component-tests.yaml b/.github/workflows/component-tests.yaml index 7fa5d2cc0..4510ecfb7 100644 --- a/.github/workflows/component-tests.yaml +++ b/.github/workflows/component-tests.yaml @@ -134,7 +134,6 @@ jobs: go-version: "1.25" - name: Update storage dependency - if: ${{ inputs.STORAGE_REF != '' }} env: STORAGE_REF: ${{ inputs.STORAGE_REF }} GONOSUMCHECK: "*" diff --git a/cmd/main.go b/cmd/main.go index c8c118c65..86ce54062 100644 --- a/cmd/main.go +++ b/cmd/main.go @@ -96,6 +96,7 @@ func main() { clusterData.AccountID = credentials.Account logger.L().Info("credentials loaded", helpers.Int("accountLength", len(credentials.Account))) } + // to enable otel, set OTEL_COLLECTOR_SVC=otel-collector:4317 if otelHost, present := os.LookupEnv("OTEL_COLLECTOR_SVC"); present { ctx = logger.InitOtel("node-agent", From fd4dca600ba50850e6394d4b801f71701adc89ec Mon Sep 17 00:00:00 2001 From: entlein Date: Sun, 15 Mar 2026 23:12:57 +0100 Subject: [PATCH 12/18] replace sotrage dependency; Signed-off-by: entlein --- .github/workflows/component-tests.yaml | 5 ++--- cmd/main.go | 1 - 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/.github/workflows/component-tests.yaml b/.github/workflows/component-tests.yaml index 4510ecfb7..2a59def39 100644 --- a/.github/workflows/component-tests.yaml +++ b/.github/workflows/component-tests.yaml @@ -135,7 +135,7 @@ jobs: - name: Update storage dependency env: - STORAGE_REF: ${{ inputs.STORAGE_REF }} + STORAGE_REF: ${{ inputs.STORAGE_REF || 'e64d59a0e65e891b832f4f29bf770059ff0144b3' }} GONOSUMCHECK: "*" GOFLAGS: "" run: | @@ -291,9 +291,8 @@ jobs: run: | sudo sh -c "ulimit -l unlimited" - name: Update storage dependency - if: ${{ inputs.STORAGE_REF != '' }} env: - STORAGE_REF: ${{ inputs.STORAGE_REF }} + STORAGE_REF: ${{ inputs.STORAGE_REF || 'e64d59a0e65e891b832f4f29bf770059ff0144b3' }} GONOSUMCHECK: "*" GOFLAGS: "" run: | diff --git a/cmd/main.go b/cmd/main.go index 86ce54062..c8c118c65 100644 --- a/cmd/main.go +++ b/cmd/main.go @@ -96,7 +96,6 @@ func main() { clusterData.AccountID = credentials.Account logger.L().Info("credentials loaded", helpers.Int("accountLength", len(credentials.Account))) } - // to enable otel, set OTEL_COLLECTOR_SVC=otel-collector:4317 if otelHost, present := os.LookupEnv("OTEL_COLLECTOR_SVC"); present { ctx = logger.InitOtel("node-agent", From fbaa00fdd9e2f6a300883a3e8c2f06ee2d211cf3 Mon Sep 17 00:00:00 2001 From: entlein Date: Mon, 16 Mar 2026 16:27:13 +0100 Subject: [PATCH 13/18] revert randomx fix Signed-off-by: entlein --- cmd/main.go | 7 ------- 1 file changed, 7 deletions(-) diff --git a/cmd/main.go b/cmd/main.go index c8c118c65..59d64bc03 100644 --- a/cmd/main.go +++ b/cmd/main.go @@ -226,13 +226,6 @@ func main() { rulesWatcher := ruleswatcher.NewRulesWatcher(k8sClient, ruleCreator, func() { ruleBindingCache.RefreshRuleBindingsRules() }, &cfg) - // Synchronous initial sync so rules are available before tracers start, - // preventing early events from being silently dropped. - if err := rulesWatcher.InitialSync(ctx); err != nil { - logger.L().Ctx(ctx).Warning("failed to perform initial rules sync, early events may be missed", helpers.Error(err)) - } else { - ruleBindingCache.RefreshRuleBindingsRules() - } dWatcher.AddAdaptor(rulesWatcher) } From 47cde540d742211abcf26d7e0203d695fb8c449d Mon Sep 17 00:00:00 2001 From: entlein Date: Mon, 16 Mar 2026 17:56:20 +0100 Subject: [PATCH 14/18] new randomx fix, for kernel 6.1 Signed-off-by: entlein --- pkg/ebpf/gadgets/randomx/program.bpf.c | 267 ++++++------------------- 1 file changed, 58 insertions(+), 209 deletions(-) diff --git a/pkg/ebpf/gadgets/randomx/program.bpf.c b/pkg/ebpf/gadgets/randomx/program.bpf.c index 46e7425e2..22db11ce6 100644 --- a/pkg/ebpf/gadgets/randomx/program.bpf.c +++ b/pkg/ebpf/gadgets/randomx/program.bpf.c @@ -28,7 +28,27 @@ #define TARGET_RANDOMX_EVENTS_COUNT 5 // 5 seconds in nanoseconds -#define MAX_NS_BETWEEN_EVENTS 5000000000ULL +#define MAX_NS_BETWEEN_EVENTS 5000000000ULL + +// RandomX MXCSR fingerprint detection. +// RandomX (used by XMR miners) configures SSE via MXCSR with: +// - Flush-to-zero (FZ, bit 15) +// - Denormals-are-zero (DAZ, bit 6) +// - All exception masks set (bits 7-12) +// - Rounding mode varies (bits 13-14): may be round-to-nearest (00) +// or round-to-zero (11) depending on RandomX execution step. +// +// Observed xmrig MXCSR values: 0x9fe0 (FZ+DAZ+masks, RC=00) +// 0xffe0 (FZ+DAZ+masks, RC=11) +// Normal process defaults: 0x1f80 (masks only) +// 0x1fa0 (masks + DAZ, set by some runtimes) +// +// Detection: FZ bit (0x8000) is almost never set by normal applications. +// When FZ is set together with DAZ (0x0040), this is a strong signal of +// RandomX-style numeric processing. We require both bits. +#define MXCSR_FZ 0x8000 // Flush-to-zero (bit 15) +#define MXCSR_DAZ 0x0040 // Denormals-are-zero (bit 6) +#define RANDOMX_MXCSR_MASK (MXCSR_FZ | MXCSR_DAZ) // This struct will hold the state for each mount namespace struct mntns_cache { @@ -41,7 +61,7 @@ struct mntns_cache { // key: mntns_id (u64), value: struct mntns_cache struct { __uint(type, BPF_MAP_TYPE_LRU_HASH); - __uint(max_entries, 1024); + __uint(max_entries, 1024); __type(key, u64); __type(value, struct mntns_cache); } mntns_event_count SEC(".maps"); @@ -71,229 +91,83 @@ struct old_fpu { }; #endif -SEC("tracepoint/x86_fpu/x86_fpu_regs_deactivated") -int tracepoint__x86_fpu_regs_deactivated(struct trace_event_raw_x86_fpu *ctx) +// Read MXCSR from the FPU struct, handling different kernel layouts. +static __always_inline int read_mxcsr(void *fpu, u32 *out) { - if (gadget_should_discard_data_current()) { - return 0; + if (LINUX_KERNEL_VERSION <= KERNEL_VERSION(5, 15, 0)) { + return bpf_probe_read_kernel(out, sizeof(*out), + &((struct old_fpu *)fpu)->state.xsave.i387.mxcsr); } + *out = BPF_CORE_READ((struct fpu *)fpu, fpstate, regs.xsave.i387.mxcsr); + return 0; +} - u64 mntns_id; - mntns_id = gadget_get_current_mntns_id(); - struct mntns_cache *cache; - cache = bpf_map_lookup_elem(&mntns_event_count, &mntns_id); - - u64 now = bpf_ktime_get_ns(); - - if (!cache) { - // First event for this mntns. Create a new entry. - struct mntns_cache new_cache = {}; - new_cache.timestamp = now; - new_cache.events_count = 1; - new_cache.alerted = false; - bpf_map_update_elem(&mntns_event_count, &mntns_id, &new_cache, BPF_ANY); - return 0; // Don't send an event yet - } - - // If we have already sent an alert for this mntns, do nothing. - if (cache->alerted) { - return 0; - } - - // Check if the last event was too long ago and reset if necessary. - if (now - cache->timestamp > MAX_NS_BETWEEN_EVENTS) { - cache->timestamp = now; - cache->events_count = 1; - bpf_map_update_elem(&mntns_event_count, &mntns_id, cache, BPF_ANY); - return 0; // Don't send an event yet - } - - // Increment the count. Using bpf_map_update_elem is not atomic, but for - // this use case (a single CPU tracepoint), it's safe. - cache->events_count++; - cache->timestamp = now; // Update timestamp with the latest event - - // Check if we have seen enough events - if (cache->events_count <= TARGET_RANDOMX_EVENTS_COUNT) { - // Not enough events yet, just update the map and exit. - bpf_map_update_elem(&mntns_event_count, &mntns_id, cache, BPF_ANY); - return 0; - } - - // --- Threshold has been reached! --- - // We only reach this point ONCE per mntns. - - // Mark as alerted to prevent sending more events for this mntns. - cache->alerted = true; - bpf_map_update_elem(&mntns_event_count, &mntns_id, cache, BPF_ANY); +// Check if the MXCSR value matches the RandomX fingerprint. +// Returns true when both FZ and DAZ bits are set — the hallmark of +// RandomX-style SSE configuration that normal workloads don't use. +static __always_inline bool is_randomx_mxcsr(u32 mxcsr) +{ + return (mxcsr & RANDOMX_MXCSR_MASK) == RANDOMX_MXCSR_MASK; +} - struct event *event; - event = gadget_reserve_buf(&events, sizeof(*event)); - if (!event) { +SEC("tracepoint/x86_fpu/x86_fpu_regs_deactivated") +int tracepoint__x86_fpu_regs_deactivated(struct trace_event_raw_x86_fpu *ctx) +{ + if (gadget_should_discard_data_current()) { return 0; } - - // Populate the event with data. This code is the same as before. - gadget_process_populate(&event->proc); + // --- Read MXCSR early and bail out for normal processes --- void *fpu = BPF_CORE_READ(ctx, fpu); if (fpu == NULL) { - gadget_discard_buf(event); return 0; } u32 mxcsr; - if(LINUX_KERNEL_VERSION <= KERNEL_VERSION(5, 15, 0)) { - bpf_probe_read_kernel(&mxcsr, sizeof(mxcsr), &((struct old_fpu*)fpu)->state.xsave.i387.mxcsr); - } else { - mxcsr = BPF_CORE_READ((struct fpu*)fpu, fpstate, regs.xsave.i387.mxcsr); - } - - int fpcr = (mxcsr & 0x6000) >> 13; - if (fpcr != 0) { - event->upper_layer = has_upper_layer(); - read_exe_path(event->exepath, sizeof(event->exepath)); - - event->timestamp_raw = bpf_ktime_get_boot_ns(); - - gadget_submit_buf(ctx, &events, event, sizeof(*event)); - } else { - gadget_discard_buf(event); + if (read_mxcsr(fpu, &mxcsr) < 0) { + return 0; } - return 0; -} - -char LICENSE[] SEC("license") = "GPL"; - -#endif // defined(__TARGET_ARCH_x86) - -/* // Kernel types definitions -#include - -// eBPF helpers signatures -// Check https://man7.org/linux/man-pages/man7/bpf-helpers.7.html to learn -// more about different available helpers -#include -#include - -// Inspektor Gadget buffer -#include -// Helpers to handle common data -#include -// Inspektor Gadget macros -#include -// Inspektor Gadget filtering -#include -// Inspektor Gadget types -#include -// Inspektor Gadget mntns -#include - -#include "program.h" -#include "upper_layer.h" -#include "exe_path.h" - -#if defined(__TARGET_ARCH_x86) - -#define TARGET_RANDOMX_EVENTS_COUNT 5 -// 5 seconds in nanoseconds -#define MAX_NS_BETWEEN_EVENTS 5000000000ULL - -// This struct will hold the state for each mount namespace -struct mntns_cache { - u64 timestamp; - u64 events_count; - bool alerted; -}; - -// A map to store the cache per mntns_id. -// key: mntns_id (u64), value: struct mntns_cache -struct { - __uint(type, BPF_MAP_TYPE_LRU_HASH); - __uint(max_entries, 1024); - __type(key, u64); - __type(value, struct mntns_cache); -} mntns_event_count SEC(".maps"); - -// events is the name of the buffer map and 1024 * 256 (256KB) is its size. -GADGET_TRACER_MAP(events, 1024 * 256); - -// Define a tracer -GADGET_TRACER(randomx, events, event); - -// Utilize the kernel version provided by libbpf. (kconfig must be present). -extern int LINUX_KERNEL_VERSION __kconfig; - -#if LINUX_KERNEL_VERSION <= KERNEL_VERSION(5, 15, 0) -struct old_fpu { - unsigned int last_cpu; - unsigned char initialized; - long: 24; - long: 64; - long: 64; - long: 64; - long: 64; - long: 64; - long: 64; - long: 64; - union fpregs_state state; -}; -#endif - -SEC("tracepoint/x86_fpu/x86_fpu_regs_deactivated") -int tracepoint__x86_fpu_regs_deactivated(struct trace_event_raw_x86_fpu *ctx) -{ - if (gadget_should_discard_data_current()) { + if (!is_randomx_mxcsr(mxcsr)) { return 0; } - u64 mntns_id; - mntns_id = gadget_get_current_mntns_id(); + // --- MXCSR looks like RandomX. Count events per mount namespace --- + u64 mntns_id = gadget_get_current_mntns_id(); struct mntns_cache *cache; cache = bpf_map_lookup_elem(&mntns_event_count, &mntns_id); u64 now = bpf_ktime_get_ns(); if (!cache) { - // First event for this mntns. Create a new entry. struct mntns_cache new_cache = {}; new_cache.timestamp = now; new_cache.events_count = 1; new_cache.alerted = false; bpf_map_update_elem(&mntns_event_count, &mntns_id, &new_cache, BPF_ANY); - return 0; // Don't send an event yet + return 0; } - // If we have already sent an alert for this mntns, do nothing. if (cache->alerted) { return 0; } - // Check if the last event was too long ago and reset if necessary. if (now - cache->timestamp > MAX_NS_BETWEEN_EVENTS) { cache->timestamp = now; cache->events_count = 1; bpf_map_update_elem(&mntns_event_count, &mntns_id, cache, BPF_ANY); - return 0; // Don't send an event yet + return 0; } - - // Increment the count. Using bpf_map_update_elem is not atomic, but for - // this use case (a single CPU tracepoint), it's safe. + cache->events_count++; - cache->timestamp = now; // Update timestamp with the latest event + cache->timestamp = now; - // Check if we have seen enough events if (cache->events_count <= TARGET_RANDOMX_EVENTS_COUNT) { - // Not enough events yet, just update the map and exit. bpf_map_update_elem(&mntns_event_count, &mntns_id, cache, BPF_ANY); return 0; } - // --- Threshold has been reached! --- - // We only reach this point ONCE per mntns. - - // Mark as alerted to prevent sending more events for this mntns. + // --- Threshold reached — emit alert once per mntns --- cache->alerted = true; bpf_map_update_elem(&mntns_event_count, &mntns_id, cache, BPF_ANY); @@ -302,40 +176,15 @@ int tracepoint__x86_fpu_regs_deactivated(struct trace_event_raw_x86_fpu *ctx) if (!event) { return 0; } - - // Populate the event with data. This code is the same as before. - gadget_process_populate(&event->proc); - - void *fpu = BPF_CORE_READ(ctx, fpu); - if (fpu == NULL) { - gadget_discard_buf(event); - return 0; - } - - u32 mxcsr; - if(LINUX_KERNEL_VERSION <= KERNEL_VERSION(5, 15, 0)) { - bpf_probe_read_kernel(&mxcsr, sizeof(mxcsr), &((struct old_fpu*)fpu)->state.xsave.i387.mxcsr); - } else { - mxcsr = BPF_CORE_READ((struct fpu*)fpu, fpstate, regs.xsave.i387.mxcsr); - } - - int fpcr = (mxcsr & 0x6000) >> 13; - if (fpcr != 0) { - event->upper_layer = has_upper_layer(); - read_exe_path(event->exepath, sizeof(event->exepath)); - event->timestamp_raw = bpf_ktime_get_boot_ns(); + gadget_process_populate(&event->proc); + event->upper_layer = has_upper_layer(); + read_exe_path(event->exepath, sizeof(event->exepath)); + event->timestamp_raw = bpf_ktime_get_boot_ns(); - gadget_submit_buf(ctx, &events, event, sizeof(*event)); - } else { - gadget_discard_buf(event); - } + gadget_submit_buf(ctx, &events, event, sizeof(*event)); return 0; } -char LICENSE[] SEC("license") = "GPL"; - -#endif // defined(__TARGET_ARCH_x86) - - */ \ No newline at end of file +char LICENSE[] SEC("license") = "GPL"; \ No newline at end of file From 139ae8abd51a91685a2913bdf7d8c188894e9f66 Mon Sep 17 00:00:00 2001 From: entlein Date: Mon, 16 Mar 2026 19:02:23 +0100 Subject: [PATCH 15/18] end of IF missing Signed-off-by: entlein --- pkg/ebpf/gadgets/randomx/program.bpf.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pkg/ebpf/gadgets/randomx/program.bpf.c b/pkg/ebpf/gadgets/randomx/program.bpf.c index 22db11ce6..44800d50c 100644 --- a/pkg/ebpf/gadgets/randomx/program.bpf.c +++ b/pkg/ebpf/gadgets/randomx/program.bpf.c @@ -187,4 +187,6 @@ int tracepoint__x86_fpu_regs_deactivated(struct trace_event_raw_x86_fpu *ctx) return 0; } -char LICENSE[] SEC("license") = "GPL"; \ No newline at end of file +char LICENSE[] SEC("license") = "GPL"; + +#endif // defined(__TARGET_ARCH_x86) \ No newline at end of file From 6822a66faf08d174fb6097d12b24f0351215492a Mon Sep 17 00:00:00 2001 From: entlein Date: Tue, 17 Mar 2026 12:09:14 +0100 Subject: [PATCH 16/18] restoring original randomx gadget Signed-off-by: entlein --- pkg/ebpf/gadgets/randomx/program.bpf.c | 265 +++++++++++++++++++------ 1 file changed, 207 insertions(+), 58 deletions(-) diff --git a/pkg/ebpf/gadgets/randomx/program.bpf.c b/pkg/ebpf/gadgets/randomx/program.bpf.c index 44800d50c..46e7425e2 100644 --- a/pkg/ebpf/gadgets/randomx/program.bpf.c +++ b/pkg/ebpf/gadgets/randomx/program.bpf.c @@ -28,27 +28,7 @@ #define TARGET_RANDOMX_EVENTS_COUNT 5 // 5 seconds in nanoseconds -#define MAX_NS_BETWEEN_EVENTS 5000000000ULL - -// RandomX MXCSR fingerprint detection. -// RandomX (used by XMR miners) configures SSE via MXCSR with: -// - Flush-to-zero (FZ, bit 15) -// - Denormals-are-zero (DAZ, bit 6) -// - All exception masks set (bits 7-12) -// - Rounding mode varies (bits 13-14): may be round-to-nearest (00) -// or round-to-zero (11) depending on RandomX execution step. -// -// Observed xmrig MXCSR values: 0x9fe0 (FZ+DAZ+masks, RC=00) -// 0xffe0 (FZ+DAZ+masks, RC=11) -// Normal process defaults: 0x1f80 (masks only) -// 0x1fa0 (masks + DAZ, set by some runtimes) -// -// Detection: FZ bit (0x8000) is almost never set by normal applications. -// When FZ is set together with DAZ (0x0040), this is a strong signal of -// RandomX-style numeric processing. We require both bits. -#define MXCSR_FZ 0x8000 // Flush-to-zero (bit 15) -#define MXCSR_DAZ 0x0040 // Denormals-are-zero (bit 6) -#define RANDOMX_MXCSR_MASK (MXCSR_FZ | MXCSR_DAZ) +#define MAX_NS_BETWEEN_EVENTS 5000000000ULL // This struct will hold the state for each mount namespace struct mntns_cache { @@ -61,7 +41,7 @@ struct mntns_cache { // key: mntns_id (u64), value: struct mntns_cache struct { __uint(type, BPF_MAP_TYPE_LRU_HASH); - __uint(max_entries, 1024); + __uint(max_entries, 1024); __type(key, u64); __type(value, struct mntns_cache); } mntns_event_count SEC(".maps"); @@ -91,25 +71,6 @@ struct old_fpu { }; #endif -// Read MXCSR from the FPU struct, handling different kernel layouts. -static __always_inline int read_mxcsr(void *fpu, u32 *out) -{ - if (LINUX_KERNEL_VERSION <= KERNEL_VERSION(5, 15, 0)) { - return bpf_probe_read_kernel(out, sizeof(*out), - &((struct old_fpu *)fpu)->state.xsave.i387.mxcsr); - } - *out = BPF_CORE_READ((struct fpu *)fpu, fpstate, regs.xsave.i387.mxcsr); - return 0; -} - -// Check if the MXCSR value matches the RandomX fingerprint. -// Returns true when both FZ and DAZ bits are set — the hallmark of -// RandomX-style SSE configuration that normal workloads don't use. -static __always_inline bool is_randomx_mxcsr(u32 mxcsr) -{ - return (mxcsr & RANDOMX_MXCSR_MASK) == RANDOMX_MXCSR_MASK; -} - SEC("tracepoint/x86_fpu/x86_fpu_regs_deactivated") int tracepoint__x86_fpu_regs_deactivated(struct trace_event_raw_x86_fpu *ctx) { @@ -117,57 +78,222 @@ int tracepoint__x86_fpu_regs_deactivated(struct trace_event_raw_x86_fpu *ctx) return 0; } - // --- Read MXCSR early and bail out for normal processes --- + u64 mntns_id; + mntns_id = gadget_get_current_mntns_id(); + struct mntns_cache *cache; + cache = bpf_map_lookup_elem(&mntns_event_count, &mntns_id); + + u64 now = bpf_ktime_get_ns(); + + if (!cache) { + // First event for this mntns. Create a new entry. + struct mntns_cache new_cache = {}; + new_cache.timestamp = now; + new_cache.events_count = 1; + new_cache.alerted = false; + bpf_map_update_elem(&mntns_event_count, &mntns_id, &new_cache, BPF_ANY); + return 0; // Don't send an event yet + } + + // If we have already sent an alert for this mntns, do nothing. + if (cache->alerted) { + return 0; + } + + // Check if the last event was too long ago and reset if necessary. + if (now - cache->timestamp > MAX_NS_BETWEEN_EVENTS) { + cache->timestamp = now; + cache->events_count = 1; + bpf_map_update_elem(&mntns_event_count, &mntns_id, cache, BPF_ANY); + return 0; // Don't send an event yet + } + + // Increment the count. Using bpf_map_update_elem is not atomic, but for + // this use case (a single CPU tracepoint), it's safe. + cache->events_count++; + cache->timestamp = now; // Update timestamp with the latest event + + // Check if we have seen enough events + if (cache->events_count <= TARGET_RANDOMX_EVENTS_COUNT) { + // Not enough events yet, just update the map and exit. + bpf_map_update_elem(&mntns_event_count, &mntns_id, cache, BPF_ANY); + return 0; + } + + // --- Threshold has been reached! --- + // We only reach this point ONCE per mntns. + + // Mark as alerted to prevent sending more events for this mntns. + cache->alerted = true; + bpf_map_update_elem(&mntns_event_count, &mntns_id, cache, BPF_ANY); + + struct event *event; + event = gadget_reserve_buf(&events, sizeof(*event)); + if (!event) { + return 0; + } + + // Populate the event with data. This code is the same as before. + gadget_process_populate(&event->proc); + void *fpu = BPF_CORE_READ(ctx, fpu); if (fpu == NULL) { + gadget_discard_buf(event); return 0; } u32 mxcsr; - if (read_mxcsr(fpu, &mxcsr) < 0) { - return 0; + if(LINUX_KERNEL_VERSION <= KERNEL_VERSION(5, 15, 0)) { + bpf_probe_read_kernel(&mxcsr, sizeof(mxcsr), &((struct old_fpu*)fpu)->state.xsave.i387.mxcsr); + } else { + mxcsr = BPF_CORE_READ((struct fpu*)fpu, fpstate, regs.xsave.i387.mxcsr); + } + + int fpcr = (mxcsr & 0x6000) >> 13; + if (fpcr != 0) { + event->upper_layer = has_upper_layer(); + read_exe_path(event->exepath, sizeof(event->exepath)); + + event->timestamp_raw = bpf_ktime_get_boot_ns(); + + gadget_submit_buf(ctx, &events, event, sizeof(*event)); + } else { + gadget_discard_buf(event); } - if (!is_randomx_mxcsr(mxcsr)) { + return 0; +} + +char LICENSE[] SEC("license") = "GPL"; + +#endif // defined(__TARGET_ARCH_x86) + +/* // Kernel types definitions +#include + +// eBPF helpers signatures +// Check https://man7.org/linux/man-pages/man7/bpf-helpers.7.html to learn +// more about different available helpers +#include +#include + +// Inspektor Gadget buffer +#include +// Helpers to handle common data +#include +// Inspektor Gadget macros +#include +// Inspektor Gadget filtering +#include +// Inspektor Gadget types +#include +// Inspektor Gadget mntns +#include + +#include "program.h" +#include "upper_layer.h" +#include "exe_path.h" + +#if defined(__TARGET_ARCH_x86) + +#define TARGET_RANDOMX_EVENTS_COUNT 5 +// 5 seconds in nanoseconds +#define MAX_NS_BETWEEN_EVENTS 5000000000ULL + +// This struct will hold the state for each mount namespace +struct mntns_cache { + u64 timestamp; + u64 events_count; + bool alerted; +}; + +// A map to store the cache per mntns_id. +// key: mntns_id (u64), value: struct mntns_cache +struct { + __uint(type, BPF_MAP_TYPE_LRU_HASH); + __uint(max_entries, 1024); + __type(key, u64); + __type(value, struct mntns_cache); +} mntns_event_count SEC(".maps"); + +// events is the name of the buffer map and 1024 * 256 (256KB) is its size. +GADGET_TRACER_MAP(events, 1024 * 256); + +// Define a tracer +GADGET_TRACER(randomx, events, event); + +// Utilize the kernel version provided by libbpf. (kconfig must be present). +extern int LINUX_KERNEL_VERSION __kconfig; + +#if LINUX_KERNEL_VERSION <= KERNEL_VERSION(5, 15, 0) +struct old_fpu { + unsigned int last_cpu; + unsigned char initialized; + long: 24; + long: 64; + long: 64; + long: 64; + long: 64; + long: 64; + long: 64; + long: 64; + union fpregs_state state; +}; +#endif + +SEC("tracepoint/x86_fpu/x86_fpu_regs_deactivated") +int tracepoint__x86_fpu_regs_deactivated(struct trace_event_raw_x86_fpu *ctx) +{ + if (gadget_should_discard_data_current()) { return 0; } - // --- MXCSR looks like RandomX. Count events per mount namespace --- - u64 mntns_id = gadget_get_current_mntns_id(); + u64 mntns_id; + mntns_id = gadget_get_current_mntns_id(); struct mntns_cache *cache; cache = bpf_map_lookup_elem(&mntns_event_count, &mntns_id); u64 now = bpf_ktime_get_ns(); if (!cache) { + // First event for this mntns. Create a new entry. struct mntns_cache new_cache = {}; new_cache.timestamp = now; new_cache.events_count = 1; new_cache.alerted = false; bpf_map_update_elem(&mntns_event_count, &mntns_id, &new_cache, BPF_ANY); - return 0; + return 0; // Don't send an event yet } + // If we have already sent an alert for this mntns, do nothing. if (cache->alerted) { return 0; } + // Check if the last event was too long ago and reset if necessary. if (now - cache->timestamp > MAX_NS_BETWEEN_EVENTS) { cache->timestamp = now; cache->events_count = 1; bpf_map_update_elem(&mntns_event_count, &mntns_id, cache, BPF_ANY); - return 0; + return 0; // Don't send an event yet } - + + // Increment the count. Using bpf_map_update_elem is not atomic, but for + // this use case (a single CPU tracepoint), it's safe. cache->events_count++; - cache->timestamp = now; + cache->timestamp = now; // Update timestamp with the latest event + // Check if we have seen enough events if (cache->events_count <= TARGET_RANDOMX_EVENTS_COUNT) { + // Not enough events yet, just update the map and exit. bpf_map_update_elem(&mntns_event_count, &mntns_id, cache, BPF_ANY); return 0; } - // --- Threshold reached — emit alert once per mntns --- + // --- Threshold has been reached! --- + // We only reach this point ONCE per mntns. + + // Mark as alerted to prevent sending more events for this mntns. cache->alerted = true; bpf_map_update_elem(&mntns_event_count, &mntns_id, cache, BPF_ANY); @@ -176,17 +302,40 @@ int tracepoint__x86_fpu_regs_deactivated(struct trace_event_raw_x86_fpu *ctx) if (!event) { return 0; } - + + // Populate the event with data. This code is the same as before. gadget_process_populate(&event->proc); - event->upper_layer = has_upper_layer(); - read_exe_path(event->exepath, sizeof(event->exepath)); - event->timestamp_raw = bpf_ktime_get_boot_ns(); - gadget_submit_buf(ctx, &events, event, sizeof(*event)); + void *fpu = BPF_CORE_READ(ctx, fpu); + if (fpu == NULL) { + gadget_discard_buf(event); + return 0; + } + + u32 mxcsr; + if(LINUX_KERNEL_VERSION <= KERNEL_VERSION(5, 15, 0)) { + bpf_probe_read_kernel(&mxcsr, sizeof(mxcsr), &((struct old_fpu*)fpu)->state.xsave.i387.mxcsr); + } else { + mxcsr = BPF_CORE_READ((struct fpu*)fpu, fpstate, regs.xsave.i387.mxcsr); + } + + int fpcr = (mxcsr & 0x6000) >> 13; + if (fpcr != 0) { + event->upper_layer = has_upper_layer(); + read_exe_path(event->exepath, sizeof(event->exepath)); + + event->timestamp_raw = bpf_ktime_get_boot_ns(); + + gadget_submit_buf(ctx, &events, event, sizeof(*event)); + } else { + gadget_discard_buf(event); + } return 0; } char LICENSE[] SEC("license") = "GPL"; -#endif // defined(__TARGET_ARCH_x86) \ No newline at end of file +#endif // defined(__TARGET_ARCH_x86) + + */ \ No newline at end of file From 87838756550ecd6d593c97c96a6efc1573a52221 Mon Sep 17 00:00:00 2001 From: entlein Date: Tue, 17 Mar 2026 13:54:56 +0100 Subject: [PATCH 17/18] randomx super simple Signed-off-by: entlein --- pkg/ebpf/gadgets/randomx/program.bpf.c | 302 +++---------------------- pkg/ebpf/gadgets/randomx/program.h | 1 + 2 files changed, 31 insertions(+), 272 deletions(-) diff --git a/pkg/ebpf/gadgets/randomx/program.bpf.c b/pkg/ebpf/gadgets/randomx/program.bpf.c index 46e7425e2..6f9455e2c 100644 --- a/pkg/ebpf/gadgets/randomx/program.bpf.c +++ b/pkg/ebpf/gadgets/randomx/program.bpf.c @@ -2,22 +2,15 @@ #include // eBPF helpers signatures -// Check https://man7.org/linux/man-pages/man7/bpf-helpers.7.html to learn -// more about different available helpers #include #include -// Inspektor Gadget buffer +// Inspektor Gadget #include -// Helpers to handle common data #include -// Inspektor Gadget macros #include -// Inspektor Gadget filtering #include -// Inspektor Gadget types #include -// Inspektor Gadget mntns #include #include "program.h" @@ -26,37 +19,27 @@ #if defined(__TARGET_ARCH_x86) -#define TARGET_RANDOMX_EVENTS_COUNT 5 -// 5 seconds in nanoseconds -#define MAX_NS_BETWEEN_EVENTS 5000000000ULL - -// This struct will hold the state for each mount namespace -struct mntns_cache { - u64 timestamp; - u64 events_count; - bool alerted; -}; - -// A map to store the cache per mntns_id. -// key: mntns_id (u64), value: struct mntns_cache -struct { - __uint(type, BPF_MAP_TYPE_LRU_HASH); - __uint(max_entries, 1024); - __type(key, u64); - __type(value, struct mntns_cache); -} mntns_event_count SEC(".maps"); - // events is the name of the buffer map and 1024 * 256 (256KB) is its size. GADGET_TRACER_MAP(events, 1024 * 256); // Define a tracer GADGET_TRACER(randomx, events, event); -// Utilize the kernel version provided by libbpf. (kconfig must be present). -extern int LINUX_KERNEL_VERSION __kconfig; - -#if LINUX_KERNEL_VERSION <= KERNEL_VERSION(5, 15, 0) -struct old_fpu { +// --- FPU struct for direct bpf_probe_read_kernel access --- +// On kernels <= 5.15 the layout is: +// struct fpu { ...; union fpregs_state state; } +// On kernels > 5.15 the layout changed to use a fpstate pointer, +// but BPF_CORE_READ through that pointer fails on some 6.x kernels. +// +// The bpftrace approach (fpu->state.xsave.i387.mxcsr) works because +// bpftrace resolves offsets from the running kernel's BTF at load time. +// We replicate that here: define a struct matching the old layout and +// use bpf_probe_read_kernel — this bypasses CO-RE relocation entirely. +// +// If the kernel has the new layout, the read will land at the wrong +// offset and we'll get a garbage mxcsr (which we emit as mxcsr_raw +// so we can diagnose). If it works, fpcr != 0 means detection fires. +struct fpu_old_layout { unsigned int last_cpu; unsigned char initialized; long: 24; @@ -69,7 +52,6 @@ struct old_fpu { long: 64; union fpregs_state state; }; -#endif SEC("tracepoint/x86_fpu/x86_fpu_regs_deactivated") int tracepoint__x86_fpu_regs_deactivated(struct trace_event_raw_x86_fpu *ctx) @@ -78,258 +60,36 @@ int tracepoint__x86_fpu_regs_deactivated(struct trace_event_raw_x86_fpu *ctx) return 0; } - u64 mntns_id; - mntns_id = gadget_get_current_mntns_id(); - struct mntns_cache *cache; - cache = bpf_map_lookup_elem(&mntns_event_count, &mntns_id); - - u64 now = bpf_ktime_get_ns(); - - if (!cache) { - // First event for this mntns. Create a new entry. - struct mntns_cache new_cache = {}; - new_cache.timestamp = now; - new_cache.events_count = 1; - new_cache.alerted = false; - bpf_map_update_elem(&mntns_event_count, &mntns_id, &new_cache, BPF_ANY); - return 0; // Don't send an event yet - } - - // If we have already sent an alert for this mntns, do nothing. - if (cache->alerted) { - return 0; - } - - // Check if the last event was too long ago and reset if necessary. - if (now - cache->timestamp > MAX_NS_BETWEEN_EVENTS) { - cache->timestamp = now; - cache->events_count = 1; - bpf_map_update_elem(&mntns_event_count, &mntns_id, cache, BPF_ANY); - return 0; // Don't send an event yet - } - - // Increment the count. Using bpf_map_update_elem is not atomic, but for - // this use case (a single CPU tracepoint), it's safe. - cache->events_count++; - cache->timestamp = now; // Update timestamp with the latest event - - // Check if we have seen enough events - if (cache->events_count <= TARGET_RANDOMX_EVENTS_COUNT) { - // Not enough events yet, just update the map and exit. - bpf_map_update_elem(&mntns_event_count, &mntns_id, cache, BPF_ANY); - return 0; - } - - // --- Threshold has been reached! --- - // We only reach this point ONCE per mntns. - - // Mark as alerted to prevent sending more events for this mntns. - cache->alerted = true; - bpf_map_update_elem(&mntns_event_count, &mntns_id, cache, BPF_ANY); - - struct event *event; - event = gadget_reserve_buf(&events, sizeof(*event)); - if (!event) { - return 0; - } - - // Populate the event with data. This code is the same as before. - gadget_process_populate(&event->proc); - void *fpu = BPF_CORE_READ(ctx, fpu); if (fpu == NULL) { - gadget_discard_buf(event); - return 0; - } - - u32 mxcsr; - if(LINUX_KERNEL_VERSION <= KERNEL_VERSION(5, 15, 0)) { - bpf_probe_read_kernel(&mxcsr, sizeof(mxcsr), &((struct old_fpu*)fpu)->state.xsave.i387.mxcsr); - } else { - mxcsr = BPF_CORE_READ((struct fpu*)fpu, fpstate, regs.xsave.i387.mxcsr); - } - - int fpcr = (mxcsr & 0x6000) >> 13; - if (fpcr != 0) { - event->upper_layer = has_upper_layer(); - read_exe_path(event->exepath, sizeof(event->exepath)); - - event->timestamp_raw = bpf_ktime_get_boot_ns(); - - gadget_submit_buf(ctx, &events, event, sizeof(*event)); - } else { - gadget_discard_buf(event); - } - - return 0; -} - -char LICENSE[] SEC("license") = "GPL"; - -#endif // defined(__TARGET_ARCH_x86) - -/* // Kernel types definitions -#include - -// eBPF helpers signatures -// Check https://man7.org/linux/man-pages/man7/bpf-helpers.7.html to learn -// more about different available helpers -#include -#include - -// Inspektor Gadget buffer -#include -// Helpers to handle common data -#include -// Inspektor Gadget macros -#include -// Inspektor Gadget filtering -#include -// Inspektor Gadget types -#include -// Inspektor Gadget mntns -#include - -#include "program.h" -#include "upper_layer.h" -#include "exe_path.h" - -#if defined(__TARGET_ARCH_x86) - -#define TARGET_RANDOMX_EVENTS_COUNT 5 -// 5 seconds in nanoseconds -#define MAX_NS_BETWEEN_EVENTS 5000000000ULL - -// This struct will hold the state for each mount namespace -struct mntns_cache { - u64 timestamp; - u64 events_count; - bool alerted; -}; - -// A map to store the cache per mntns_id. -// key: mntns_id (u64), value: struct mntns_cache -struct { - __uint(type, BPF_MAP_TYPE_LRU_HASH); - __uint(max_entries, 1024); - __type(key, u64); - __type(value, struct mntns_cache); -} mntns_event_count SEC(".maps"); - -// events is the name of the buffer map and 1024 * 256 (256KB) is its size. -GADGET_TRACER_MAP(events, 1024 * 256); - -// Define a tracer -GADGET_TRACER(randomx, events, event); - -// Utilize the kernel version provided by libbpf. (kconfig must be present). -extern int LINUX_KERNEL_VERSION __kconfig; - -#if LINUX_KERNEL_VERSION <= KERNEL_VERSION(5, 15, 0) -struct old_fpu { - unsigned int last_cpu; - unsigned char initialized; - long: 24; - long: 64; - long: 64; - long: 64; - long: 64; - long: 64; - long: 64; - long: 64; - union fpregs_state state; -}; -#endif - -SEC("tracepoint/x86_fpu/x86_fpu_regs_deactivated") -int tracepoint__x86_fpu_regs_deactivated(struct trace_event_raw_x86_fpu *ctx) -{ - if (gadget_should_discard_data_current()) { - return 0; - } - - u64 mntns_id; - mntns_id = gadget_get_current_mntns_id(); - struct mntns_cache *cache; - cache = bpf_map_lookup_elem(&mntns_event_count, &mntns_id); - - u64 now = bpf_ktime_get_ns(); - - if (!cache) { - // First event for this mntns. Create a new entry. - struct mntns_cache new_cache = {}; - new_cache.timestamp = now; - new_cache.events_count = 1; - new_cache.alerted = false; - bpf_map_update_elem(&mntns_event_count, &mntns_id, &new_cache, BPF_ANY); - return 0; // Don't send an event yet - } - - // If we have already sent an alert for this mntns, do nothing. - if (cache->alerted) { return 0; } - // Check if the last event was too long ago and reset if necessary. - if (now - cache->timestamp > MAX_NS_BETWEEN_EVENTS) { - cache->timestamp = now; - cache->events_count = 1; - bpf_map_update_elem(&mntns_event_count, &mntns_id, cache, BPF_ANY); - return 0; // Don't send an event yet - } - - // Increment the count. Using bpf_map_update_elem is not atomic, but for - // this use case (a single CPU tracepoint), it's safe. - cache->events_count++; - cache->timestamp = now; // Update timestamp with the latest event + // Read MXCSR the old way: fpu->state.xsave.i387.mxcsr + // This is the bpftrace approach that works on kernel 6.1. + u32 mxcsr = 0; + bpf_probe_read_kernel(&mxcsr, sizeof(mxcsr), + &((struct fpu_old_layout *)fpu)->state.xsave.i387.mxcsr); - // Check if we have seen enough events - if (cache->events_count <= TARGET_RANDOMX_EVENTS_COUNT) { - // Not enough events yet, just update the map and exit. - bpf_map_update_elem(&mntns_event_count, &mntns_id, cache, BPF_ANY); + int fpcr = (mxcsr & 0x6000) >> 13; + if (fpcr == 0) { return 0; } - // --- Threshold has been reached! --- - // We only reach this point ONCE per mntns. - - // Mark as alerted to prevent sending more events for this mntns. - cache->alerted = true; - bpf_map_update_elem(&mntns_event_count, &mntns_id, cache, BPF_ANY); - + // Detection triggered — emit event struct event *event; event = gadget_reserve_buf(&events, sizeof(*event)); if (!event) { return 0; } - - // Populate the event with data. This code is the same as before. - gadget_process_populate(&event->proc); - - void *fpu = BPF_CORE_READ(ctx, fpu); - if (fpu == NULL) { - gadget_discard_buf(event); - return 0; - } - - u32 mxcsr; - if(LINUX_KERNEL_VERSION <= KERNEL_VERSION(5, 15, 0)) { - bpf_probe_read_kernel(&mxcsr, sizeof(mxcsr), &((struct old_fpu*)fpu)->state.xsave.i387.mxcsr); - } else { - mxcsr = BPF_CORE_READ((struct fpu*)fpu, fpstate, regs.xsave.i387.mxcsr); - } - - int fpcr = (mxcsr & 0x6000) >> 13; - if (fpcr != 0) { - event->upper_layer = has_upper_layer(); - read_exe_path(event->exepath, sizeof(event->exepath)); - event->timestamp_raw = bpf_ktime_get_boot_ns(); + gadget_process_populate(&event->proc); + event->upper_layer = has_upper_layer(); + event->mxcsr_raw = mxcsr; + read_exe_path(event->exepath, sizeof(event->exepath)); + event->timestamp_raw = bpf_ktime_get_boot_ns(); - gadget_submit_buf(ctx, &events, event, sizeof(*event)); - } else { - gadget_discard_buf(event); - } + gadget_submit_buf(ctx, &events, event, sizeof(*event)); return 0; } @@ -337,5 +97,3 @@ int tracepoint__x86_fpu_regs_deactivated(struct trace_event_raw_x86_fpu *ctx) char LICENSE[] SEC("license") = "GPL"; #endif // defined(__TARGET_ARCH_x86) - - */ \ No newline at end of file diff --git a/pkg/ebpf/gadgets/randomx/program.h b/pkg/ebpf/gadgets/randomx/program.h index ca9665869..626dd1189 100644 --- a/pkg/ebpf/gadgets/randomx/program.h +++ b/pkg/ebpf/gadgets/randomx/program.h @@ -6,5 +6,6 @@ struct event { gadget_timestamp timestamp_raw; struct gadget_process proc; bool upper_layer; + __u32 mxcsr_raw; char exepath[GADGET_PATH_MAX]; }; From 4bcd364e3b412b8a1b397018e8b0dda8ad1fd5cc Mon Sep 17 00:00:00 2001 From: entlein Date: Tue, 17 Mar 2026 16:51:32 +0100 Subject: [PATCH 18/18] randomx revert again back , didnt help Signed-off-by: entlein --- pkg/ebpf/gadgets/randomx/program.bpf.c | 302 ++++++++++++++++++++++--- pkg/ebpf/gadgets/randomx/program.h | 1 - 2 files changed, 272 insertions(+), 31 deletions(-) diff --git a/pkg/ebpf/gadgets/randomx/program.bpf.c b/pkg/ebpf/gadgets/randomx/program.bpf.c index 6f9455e2c..46e7425e2 100644 --- a/pkg/ebpf/gadgets/randomx/program.bpf.c +++ b/pkg/ebpf/gadgets/randomx/program.bpf.c @@ -2,15 +2,22 @@ #include // eBPF helpers signatures +// Check https://man7.org/linux/man-pages/man7/bpf-helpers.7.html to learn +// more about different available helpers #include #include -// Inspektor Gadget +// Inspektor Gadget buffer #include +// Helpers to handle common data #include +// Inspektor Gadget macros #include +// Inspektor Gadget filtering #include +// Inspektor Gadget types #include +// Inspektor Gadget mntns #include #include "program.h" @@ -19,27 +26,37 @@ #if defined(__TARGET_ARCH_x86) +#define TARGET_RANDOMX_EVENTS_COUNT 5 +// 5 seconds in nanoseconds +#define MAX_NS_BETWEEN_EVENTS 5000000000ULL + +// This struct will hold the state for each mount namespace +struct mntns_cache { + u64 timestamp; + u64 events_count; + bool alerted; +}; + +// A map to store the cache per mntns_id. +// key: mntns_id (u64), value: struct mntns_cache +struct { + __uint(type, BPF_MAP_TYPE_LRU_HASH); + __uint(max_entries, 1024); + __type(key, u64); + __type(value, struct mntns_cache); +} mntns_event_count SEC(".maps"); + // events is the name of the buffer map and 1024 * 256 (256KB) is its size. GADGET_TRACER_MAP(events, 1024 * 256); // Define a tracer GADGET_TRACER(randomx, events, event); -// --- FPU struct for direct bpf_probe_read_kernel access --- -// On kernels <= 5.15 the layout is: -// struct fpu { ...; union fpregs_state state; } -// On kernels > 5.15 the layout changed to use a fpstate pointer, -// but BPF_CORE_READ through that pointer fails on some 6.x kernels. -// -// The bpftrace approach (fpu->state.xsave.i387.mxcsr) works because -// bpftrace resolves offsets from the running kernel's BTF at load time. -// We replicate that here: define a struct matching the old layout and -// use bpf_probe_read_kernel — this bypasses CO-RE relocation entirely. -// -// If the kernel has the new layout, the read will land at the wrong -// offset and we'll get a garbage mxcsr (which we emit as mxcsr_raw -// so we can diagnose). If it works, fpcr != 0 means detection fires. -struct fpu_old_layout { +// Utilize the kernel version provided by libbpf. (kconfig must be present). +extern int LINUX_KERNEL_VERSION __kconfig; + +#if LINUX_KERNEL_VERSION <= KERNEL_VERSION(5, 15, 0) +struct old_fpu { unsigned int last_cpu; unsigned char initialized; long: 24; @@ -52,6 +69,7 @@ struct fpu_old_layout { long: 64; union fpregs_state state; }; +#endif SEC("tracepoint/x86_fpu/x86_fpu_regs_deactivated") int tracepoint__x86_fpu_regs_deactivated(struct trace_event_raw_x86_fpu *ctx) @@ -60,36 +78,258 @@ int tracepoint__x86_fpu_regs_deactivated(struct trace_event_raw_x86_fpu *ctx) return 0; } + u64 mntns_id; + mntns_id = gadget_get_current_mntns_id(); + struct mntns_cache *cache; + cache = bpf_map_lookup_elem(&mntns_event_count, &mntns_id); + + u64 now = bpf_ktime_get_ns(); + + if (!cache) { + // First event for this mntns. Create a new entry. + struct mntns_cache new_cache = {}; + new_cache.timestamp = now; + new_cache.events_count = 1; + new_cache.alerted = false; + bpf_map_update_elem(&mntns_event_count, &mntns_id, &new_cache, BPF_ANY); + return 0; // Don't send an event yet + } + + // If we have already sent an alert for this mntns, do nothing. + if (cache->alerted) { + return 0; + } + + // Check if the last event was too long ago and reset if necessary. + if (now - cache->timestamp > MAX_NS_BETWEEN_EVENTS) { + cache->timestamp = now; + cache->events_count = 1; + bpf_map_update_elem(&mntns_event_count, &mntns_id, cache, BPF_ANY); + return 0; // Don't send an event yet + } + + // Increment the count. Using bpf_map_update_elem is not atomic, but for + // this use case (a single CPU tracepoint), it's safe. + cache->events_count++; + cache->timestamp = now; // Update timestamp with the latest event + + // Check if we have seen enough events + if (cache->events_count <= TARGET_RANDOMX_EVENTS_COUNT) { + // Not enough events yet, just update the map and exit. + bpf_map_update_elem(&mntns_event_count, &mntns_id, cache, BPF_ANY); + return 0; + } + + // --- Threshold has been reached! --- + // We only reach this point ONCE per mntns. + + // Mark as alerted to prevent sending more events for this mntns. + cache->alerted = true; + bpf_map_update_elem(&mntns_event_count, &mntns_id, cache, BPF_ANY); + + struct event *event; + event = gadget_reserve_buf(&events, sizeof(*event)); + if (!event) { + return 0; + } + + // Populate the event with data. This code is the same as before. + gadget_process_populate(&event->proc); + void *fpu = BPF_CORE_READ(ctx, fpu); if (fpu == NULL) { + gadget_discard_buf(event); return 0; } - // Read MXCSR the old way: fpu->state.xsave.i387.mxcsr - // This is the bpftrace approach that works on kernel 6.1. - u32 mxcsr = 0; - bpf_probe_read_kernel(&mxcsr, sizeof(mxcsr), - &((struct fpu_old_layout *)fpu)->state.xsave.i387.mxcsr); - + u32 mxcsr; + if(LINUX_KERNEL_VERSION <= KERNEL_VERSION(5, 15, 0)) { + bpf_probe_read_kernel(&mxcsr, sizeof(mxcsr), &((struct old_fpu*)fpu)->state.xsave.i387.mxcsr); + } else { + mxcsr = BPF_CORE_READ((struct fpu*)fpu, fpstate, regs.xsave.i387.mxcsr); + } + int fpcr = (mxcsr & 0x6000) >> 13; - if (fpcr == 0) { + if (fpcr != 0) { + event->upper_layer = has_upper_layer(); + read_exe_path(event->exepath, sizeof(event->exepath)); + + event->timestamp_raw = bpf_ktime_get_boot_ns(); + + gadget_submit_buf(ctx, &events, event, sizeof(*event)); + } else { + gadget_discard_buf(event); + } + + return 0; +} + +char LICENSE[] SEC("license") = "GPL"; + +#endif // defined(__TARGET_ARCH_x86) + +/* // Kernel types definitions +#include + +// eBPF helpers signatures +// Check https://man7.org/linux/man-pages/man7/bpf-helpers.7.html to learn +// more about different available helpers +#include +#include + +// Inspektor Gadget buffer +#include +// Helpers to handle common data +#include +// Inspektor Gadget macros +#include +// Inspektor Gadget filtering +#include +// Inspektor Gadget types +#include +// Inspektor Gadget mntns +#include + +#include "program.h" +#include "upper_layer.h" +#include "exe_path.h" + +#if defined(__TARGET_ARCH_x86) + +#define TARGET_RANDOMX_EVENTS_COUNT 5 +// 5 seconds in nanoseconds +#define MAX_NS_BETWEEN_EVENTS 5000000000ULL + +// This struct will hold the state for each mount namespace +struct mntns_cache { + u64 timestamp; + u64 events_count; + bool alerted; +}; + +// A map to store the cache per mntns_id. +// key: mntns_id (u64), value: struct mntns_cache +struct { + __uint(type, BPF_MAP_TYPE_LRU_HASH); + __uint(max_entries, 1024); + __type(key, u64); + __type(value, struct mntns_cache); +} mntns_event_count SEC(".maps"); + +// events is the name of the buffer map and 1024 * 256 (256KB) is its size. +GADGET_TRACER_MAP(events, 1024 * 256); + +// Define a tracer +GADGET_TRACER(randomx, events, event); + +// Utilize the kernel version provided by libbpf. (kconfig must be present). +extern int LINUX_KERNEL_VERSION __kconfig; + +#if LINUX_KERNEL_VERSION <= KERNEL_VERSION(5, 15, 0) +struct old_fpu { + unsigned int last_cpu; + unsigned char initialized; + long: 24; + long: 64; + long: 64; + long: 64; + long: 64; + long: 64; + long: 64; + long: 64; + union fpregs_state state; +}; +#endif + +SEC("tracepoint/x86_fpu/x86_fpu_regs_deactivated") +int tracepoint__x86_fpu_regs_deactivated(struct trace_event_raw_x86_fpu *ctx) +{ + if (gadget_should_discard_data_current()) { + return 0; + } + + u64 mntns_id; + mntns_id = gadget_get_current_mntns_id(); + struct mntns_cache *cache; + cache = bpf_map_lookup_elem(&mntns_event_count, &mntns_id); + + u64 now = bpf_ktime_get_ns(); + + if (!cache) { + // First event for this mntns. Create a new entry. + struct mntns_cache new_cache = {}; + new_cache.timestamp = now; + new_cache.events_count = 1; + new_cache.alerted = false; + bpf_map_update_elem(&mntns_event_count, &mntns_id, &new_cache, BPF_ANY); + return 0; // Don't send an event yet + } + + // If we have already sent an alert for this mntns, do nothing. + if (cache->alerted) { + return 0; + } + + // Check if the last event was too long ago and reset if necessary. + if (now - cache->timestamp > MAX_NS_BETWEEN_EVENTS) { + cache->timestamp = now; + cache->events_count = 1; + bpf_map_update_elem(&mntns_event_count, &mntns_id, cache, BPF_ANY); + return 0; // Don't send an event yet + } + + // Increment the count. Using bpf_map_update_elem is not atomic, but for + // this use case (a single CPU tracepoint), it's safe. + cache->events_count++; + cache->timestamp = now; // Update timestamp with the latest event + + // Check if we have seen enough events + if (cache->events_count <= TARGET_RANDOMX_EVENTS_COUNT) { + // Not enough events yet, just update the map and exit. + bpf_map_update_elem(&mntns_event_count, &mntns_id, cache, BPF_ANY); return 0; } - // Detection triggered — emit event + // --- Threshold has been reached! --- + // We only reach this point ONCE per mntns. + + // Mark as alerted to prevent sending more events for this mntns. + cache->alerted = true; + bpf_map_update_elem(&mntns_event_count, &mntns_id, cache, BPF_ANY); + struct event *event; event = gadget_reserve_buf(&events, sizeof(*event)); if (!event) { return 0; } - + + // Populate the event with data. This code is the same as before. gadget_process_populate(&event->proc); - event->upper_layer = has_upper_layer(); - event->mxcsr_raw = mxcsr; - read_exe_path(event->exepath, sizeof(event->exepath)); - event->timestamp_raw = bpf_ktime_get_boot_ns(); - gadget_submit_buf(ctx, &events, event, sizeof(*event)); + void *fpu = BPF_CORE_READ(ctx, fpu); + if (fpu == NULL) { + gadget_discard_buf(event); + return 0; + } + + u32 mxcsr; + if(LINUX_KERNEL_VERSION <= KERNEL_VERSION(5, 15, 0)) { + bpf_probe_read_kernel(&mxcsr, sizeof(mxcsr), &((struct old_fpu*)fpu)->state.xsave.i387.mxcsr); + } else { + mxcsr = BPF_CORE_READ((struct fpu*)fpu, fpstate, regs.xsave.i387.mxcsr); + } + + int fpcr = (mxcsr & 0x6000) >> 13; + if (fpcr != 0) { + event->upper_layer = has_upper_layer(); + read_exe_path(event->exepath, sizeof(event->exepath)); + + event->timestamp_raw = bpf_ktime_get_boot_ns(); + + gadget_submit_buf(ctx, &events, event, sizeof(*event)); + } else { + gadget_discard_buf(event); + } return 0; } @@ -97,3 +337,5 @@ int tracepoint__x86_fpu_regs_deactivated(struct trace_event_raw_x86_fpu *ctx) char LICENSE[] SEC("license") = "GPL"; #endif // defined(__TARGET_ARCH_x86) + + */ \ No newline at end of file diff --git a/pkg/ebpf/gadgets/randomx/program.h b/pkg/ebpf/gadgets/randomx/program.h index 626dd1189..ca9665869 100644 --- a/pkg/ebpf/gadgets/randomx/program.h +++ b/pkg/ebpf/gadgets/randomx/program.h @@ -6,6 +6,5 @@ struct event { gadget_timestamp timestamp_raw; struct gadget_process proc; bool upper_layer; - __u32 mxcsr_raw; char exepath[GADGET_PATH_MAX]; };