From e21def257344655a8dd3b71670c7c2b451b06009 Mon Sep 17 00:00:00 2001 From: Nicholas Wiersma Date: Mon, 13 Apr 2026 11:39:16 +0200 Subject: [PATCH 1/9] feat: add allocations tracking and metric Signed-off-by: Nicholas Wiersma --- pkg/apis/agones/v1/fleet.go | 2 + .../agones/v1/fleetstatus.go | 9 ++ pkg/fleets/controller.go | 92 +++++++++++++++++++ pkg/fleets/controller_test.go | 31 +++++++ pkg/metrics/controller.go | 3 + pkg/metrics/controller_metrics.go | 45 +++++---- pkg/metrics/controller_test.go | 38 ++++++++ pkg/metrics/exporter_test.go | 14 +++ 8 files changed, 216 insertions(+), 18 deletions(-) diff --git a/pkg/apis/agones/v1/fleet.go b/pkg/apis/agones/v1/fleet.go index 7cc9737392..58e38148c4 100644 --- a/pkg/apis/agones/v1/fleet.go +++ b/pkg/apis/agones/v1/fleet.go @@ -96,6 +96,8 @@ type FleetStatus struct { ReservedReplicas int32 `json:"reservedReplicas"` // AllocatedReplicas are the number of Allocated GameServer replicas AllocatedReplicas int32 `json:"allocatedReplicas"` + // Allocations is a counter of the number of allocations observed. + Allocations int64 `json:"allocations"` // [Stage:Alpha] // [FeatureFlag:PlayerTracking] // Players are the current total player capacity and count for this Fleet diff --git a/pkg/client/applyconfiguration/agones/v1/fleetstatus.go b/pkg/client/applyconfiguration/agones/v1/fleetstatus.go index 89138b18f3..2c58079c83 100644 --- a/pkg/client/applyconfiguration/agones/v1/fleetstatus.go +++ b/pkg/client/applyconfiguration/agones/v1/fleetstatus.go @@ -25,6 +25,7 @@ type FleetStatusApplyConfiguration struct { ReadyReplicas *int32 `json:"readyReplicas,omitempty"` ReservedReplicas *int32 `json:"reservedReplicas,omitempty"` AllocatedReplicas *int32 `json:"allocatedReplicas,omitempty"` + Allocations *int64 `json:"allocations,omitempty"` Players *AggregatedPlayerStatusApplyConfiguration `json:"players,omitempty"` Counters map[string]AggregatedCounterStatusApplyConfiguration `json:"counters,omitempty"` Lists map[string]AggregatedListStatusApplyConfiguration `json:"lists,omitempty"` @@ -68,6 +69,14 @@ func (b *FleetStatusApplyConfiguration) WithAllocatedReplicas(value int32) *Flee return b } +// WithAllocations sets the Allocations field in the declarative configuration to the given value +// and returns the receiver, so that objects can be built by chaining "With" function invocations. +// If called multiple times, the Allocations field is set to the value of the last call. +func (b *FleetStatusApplyConfiguration) WithAllocations(value int64) *FleetStatusApplyConfiguration { + b.Allocations = &value + return b +} + // WithPlayers sets the Players field in the declarative configuration to the given value // and returns the receiver, so that objects can be built by chaining "With" function invocations. // If called multiple times, the Players field is set to the value of the last call. diff --git a/pkg/fleets/controller.go b/pkg/fleets/controller.go index e3d0afb2b7..ab14a2bc5b 100644 --- a/pkg/fleets/controller.go +++ b/pkg/fleets/controller.go @@ -18,6 +18,7 @@ import ( "context" "encoding/json" "fmt" + "sync" "time" "agones.dev/agones/pkg/apis/agones" @@ -71,6 +72,8 @@ type Controller struct { fleetLister listerv1.FleetLister fleetSynced cache.InformerSynced workerqueue *workerqueue.WorkerQueue + allocsMu sync.Mutex + allocs map[string]int64 recorder record.EventRecorder } @@ -82,6 +85,9 @@ func NewController( agonesClient versioned.Interface, agonesInformerFactory externalversions.SharedInformerFactory) *Controller { + gameServers := agonesInformerFactory.Agones().V1().GameServers() + gsInformer := gameServers.Informer() + gameServerSets := agonesInformerFactory.Agones().V1().GameServerSets() gsSetInformer := gameServerSets.Informer() @@ -96,6 +102,7 @@ func NewController( fleetGetter: agonesClient.AgonesV1(), fleetLister: fleets.Lister(), fleetSynced: fInformer.HasSynced, + allocs: map[string]int64{}, } c.baseLogger = runtime.NewLoggerWithType(c) @@ -112,6 +119,11 @@ func NewController( UpdateFunc: func(_, newObj interface{}) { c.workerqueue.Enqueue(newObj) }, + DeleteFunc: func(obj interface{}) { + fleet := obj.(*agonesv1.Fleet) + + c.removeAllocations(fleet.ObjectMeta.Namespace, fleet.ObjectMeta.Name) + }, }) _, _ = gsSetInformer.AddEventHandler(cache.ResourceEventHandlerFuncs{ @@ -125,6 +137,25 @@ func NewController( }, }) + _, _ = gsInformer.AddEventHandler(cache.ResourceEventHandlerFuncs{ + UpdateFunc: func(oldObj, newObj interface{}) { + oldGs := oldObj.(*agonesv1.GameServer) + newGs := newObj.(*agonesv1.GameServer) + + if oldGs.Status.State == agonesv1.GameServerStateAllocated || newGs.Status.State != agonesv1.GameServerStateAllocated { + // Count only the transition of a GameServer into the Allocated state. + return + } + fleet, ok := newGs.Labels[agonesv1.FleetNameLabel] + if !ok || fleet == "" { + // The game server is not attached to a fleet. Nothing to do. + return + } + + c.incAllocations(newGs.Namespace, fleet) + }, + }) + return c } @@ -226,6 +257,8 @@ func (c *Controller) Run(ctx context.Context, workers int) error { } c.workerqueue.Run(ctx, workers) + + c.flushAllocations() return nil } @@ -723,6 +756,8 @@ func (c *Controller) updateFleetStatus(ctx context.Context, fleet *agonesv1.Flee } } + fCopy.Status.Allocations += c.getAllocations(fleet.ObjectMeta.Namespace, fCopy.ObjectMeta.Name) + _, err = c.fleetGetter.Fleets(fCopy.ObjectMeta.Namespace).UpdateStatus(ctx, fCopy, metav1.UpdateOptions{}) return errors.Wrapf(err, "error updating status of fleet %s", fCopy.ObjectMeta.Name) } @@ -760,6 +795,63 @@ func (c *Controller) filterGameServerSetByActive(fleet *agonesv1.Fleet, list []* return active, rest } +func (c *Controller) getAllocations(ns, fleetName string) (allocs int64) { + key := cache.ObjectName{Namespace: ns, Name: fleetName}.String() + + c.allocsMu.Lock() + defer c.allocsMu.Unlock() + + allocs, c.allocs[key] = c.allocs[key], 0 + return allocs +} + +func (c *Controller) incAllocations(ns, fleetName string) { + key := cache.ObjectName{Namespace: ns, Name: fleetName}.String() + + c.allocsMu.Lock() + defer c.allocsMu.Unlock() + + count, ok := c.allocs[key] + if !ok { + c.allocs[key] = 1 + return + } + c.allocs[key] = count + 1 +} + +func (c *Controller) removeAllocations(ns, fleetName string) { + key := cache.ObjectName{Namespace: ns, Name: fleetName}.String() + + c.allocsMu.Lock() + defer c.allocsMu.Unlock() + + delete(c.allocs, key) +} + +func (c *Controller) flushAllocations() { + ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) + defer cancel() + + c.allocsMu.Lock() + defer c.allocsMu.Unlock() + + for fleet, allocs := range c.allocs { + if allocs == 0 { + continue + } + + ns, name, _ := cache.SplitMetaNamespaceKey(fleet) + fCopy, err := c.fleetGetter.Fleets(ns).Get(ctx, name, metav1.GetOptions{}) + if err != nil { + continue + } + + fCopy.Status.Allocations += allocs + + _, _ = c.fleetGetter.Fleets(ns).UpdateStatus(ctx, fCopy, metav1.UpdateOptions{}) + } +} + // mergeCounters adds the contents of AggregatedCounterStatus c2 into c1. func mergeCounters(c1, c2 map[string]agonesv1.AggregatedCounterStatus) map[string]agonesv1.AggregatedCounterStatus { if c1 == nil { diff --git a/pkg/fleets/controller_test.go b/pkg/fleets/controller_test.go index 378e52039e..581dbf3fa7 100644 --- a/pkg/fleets/controller_test.go +++ b/pkg/fleets/controller_test.go @@ -310,6 +310,37 @@ func TestControllerSyncFleet(t *testing.T) { agtesting.AssertNoEvent(t, m.FakeRecorder.Events) }) + t.Run("fleets update allocation counter", func(t *testing.T) { + f := defaultFixture() + f.Spec.Strategy.Type = appsv1.RollingUpdateDeploymentStrategyType + + c, m := newFakeController() + gsSet := f.GameServerSet() + gsSet.ObjectMeta.Name = "gsSet1" + gsSet.ObjectMeta.UID = "4321" + gsSet.Spec.Replicas = f.Spec.Replicas + + m.AgonesClient.AddReactor("list", "fleets", func(_ k8stesting.Action) (bool, runtime.Object, error) { + return true, &agonesv1.FleetList{Items: []agonesv1.Fleet{*f}}, nil + }) + m.AgonesClient.AddReactor("get", "fleets", func(_ k8stesting.Action) (bool, runtime.Object, error) { + return true, f, nil + }) + + m.AgonesClient.AddReactor("list", "gameserversets", func(_ k8stesting.Action) (bool, runtime.Object, error) { + return true, &agonesv1.GameServerSetList{Items: []agonesv1.GameServerSet{*gsSet}}, nil + }) + + ctx, cancel := agtesting.StartInformers(m, c.fleetSynced, c.gameServerSetSynced) + defer cancel() + + c.incAllocations("default", "fleet-1") + + err := c.syncFleet(ctx, "default/fleet-1") + assert.Nil(t, err) + assert.Equal(t, int64(1), f.Status.Allocations) + }) + t.Run("error on getting fleet", func(t *testing.T) { c, _ := newFakeController() c.fleetLister = &fakeFleetListerWithErr{} diff --git a/pkg/metrics/controller.go b/pkg/metrics/controller.go index 460902b5f2..f0298b3d33 100644 --- a/pkg/metrics/controller.go +++ b/pkg/metrics/controller.go @@ -456,6 +456,9 @@ func (c *Controller) recordGameServerStatusChanges(old, next interface{}) { if newGs.Status.State != oldGs.Status.State { RecordWithTags(context.Background(), []tag.Mutator{tag.Upsert(keyType, string(newGs.Status.State)), tag.Upsert(keyFleetName, fleetName), tag.Upsert(keyNamespace, newGs.GetNamespace())}, gameServerTotalStats.M(1)) + if newGs.Status.State == agonesv1.GameServerStateAllocated { + RecordWithTags(context.Background(), []tag.Mutator{tag.Upsert(keyFleetName, fleetName), tag.Upsert(keyNamespace, newGs.GetNamespace())}, gameServerAllocationsTotalStats.M(1)) + } // Calculate the duration of the current state duration, err := c.calcDuration(oldGs, newGs) diff --git a/pkg/metrics/controller_metrics.go b/pkg/metrics/controller_metrics.go index 22bf46defc..d905ee5de9 100644 --- a/pkg/metrics/controller_metrics.go +++ b/pkg/metrics/controller_metrics.go @@ -33,6 +33,7 @@ const ( fleetListsName = "fleet_lists" gameServersCountName = "gameservers_count" gameServersTotalName = "gameservers_total" + gameServersAllocationsTotalName = "gameservers_allocations_total" gameServersPlayerConnectedTotalName = "gameserver_player_connected_total" gameServersPlayerCapacityTotalName = "gameserver_player_capacity_total" nodeCountName = "nodes_count" @@ -47,24 +48,25 @@ var ( // fleetViews are metric views associated with Fleets fleetViews = append([]string{fleetRolloutPercent, fleetReplicaCountName, gameServersCountName, gameServersTotalName, gameServersPlayerConnectedTotalName, gameServersPlayerCapacityTotalName, gameServerStateDurationName, fleetCountersName, fleetListsName}, fleetAutoscalerViews...) - stateDurationSeconds = []float64{0, 1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384} - fleetRolloutPercentStats = stats.Int64("fleets/rollout_percent", "The current fleet rollout percentage", "1") - fleetsReplicasCountStats = stats.Int64("fleets/replicas_count", "The count of replicas per fleet", "1") - fasBufferLimitsCountStats = stats.Int64("fas/buffer_limits", "The buffer limits of autoscalers", "1") - fasBufferSizeStats = stats.Int64("fas/buffer_size", "The buffer size value of autoscalers", "1") - fasCurrentReplicasStats = stats.Int64("fas/current_replicas_count", "The current replicas cout as seen by autoscalers", "1") - fasDesiredReplicasStats = stats.Int64("fas/desired_replicas_count", "The desired replicas cout as seen by autoscalers", "1") - fasAbleToScaleStats = stats.Int64("fas/able_to_scale", "The fleet autoscaler can access the fleet to scale (0 indicates false, 1 indicates true)", "1") - fasLimitedStats = stats.Int64("fas/limited", "The fleet autoscaler is capped (0 indicates false, 1 indicates true)", "1") - fleetCountersStats = stats.Int64("fleets/counters", "Aggregated Counters counts and capacity across GameServers in the Fleet", "1") - fleetListsStats = stats.Int64("fleets/lists", "Aggregated Lists counts and capacity across GameServers in the Fleet", "1") - gameServerCountStats = stats.Int64("gameservers/count", "The count of gameservers", "1") - gameServerTotalStats = stats.Int64("gameservers/total", "The total of gameservers", "1") - gameServerPlayerConnectedTotal = stats.Int64("gameservers/player_connected", "The total number of players connected to gameservers", "1") - gameServerPlayerCapacityTotal = stats.Int64("gameservers/player_capacity", "The available player capacity for gameservers", "1") - nodesCountStats = stats.Int64("nodes/count", "The count of nodes in the cluster", "1") - gsPerNodesCountStats = stats.Int64("gameservers_node/count", "The count of gameservers per node in the cluster", "1") - gsStateDurationSec = stats.Float64("gameservers_state/duration", "The duration of gameservers to be in a particular state", stats.UnitSeconds) + stateDurationSeconds = []float64{0, 1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384} + fleetRolloutPercentStats = stats.Int64("fleets/rollout_percent", "The current fleet rollout percentage", "1") + fleetsReplicasCountStats = stats.Int64("fleets/replicas_count", "The count of replicas per fleet", "1") + fasBufferLimitsCountStats = stats.Int64("fas/buffer_limits", "The buffer limits of autoscalers", "1") + fasBufferSizeStats = stats.Int64("fas/buffer_size", "The buffer size value of autoscalers", "1") + fasCurrentReplicasStats = stats.Int64("fas/current_replicas_count", "The current replicas cout as seen by autoscalers", "1") + fasDesiredReplicasStats = stats.Int64("fas/desired_replicas_count", "The desired replicas cout as seen by autoscalers", "1") + fasAbleToScaleStats = stats.Int64("fas/able_to_scale", "The fleet autoscaler can access the fleet to scale (0 indicates false, 1 indicates true)", "1") + fasLimitedStats = stats.Int64("fas/limited", "The fleet autoscaler is capped (0 indicates false, 1 indicates true)", "1") + fleetCountersStats = stats.Int64("fleets/counters", "Aggregated Counters counts and capacity across GameServers in the Fleet", "1") + fleetListsStats = stats.Int64("fleets/lists", "Aggregated Lists counts and capacity across GameServers in the Fleet", "1") + gameServerCountStats = stats.Int64("gameservers/count", "The count of gameservers", "1") + gameServerTotalStats = stats.Int64("gameservers/total", "The total of gameservers", "1") + gameServerAllocationsTotalStats = stats.Int64("gameservers/allocations_total", "The total of gameserver allocations", "1") + gameServerPlayerConnectedTotal = stats.Int64("gameservers/player_connected", "The total number of players connected to gameservers", "1") + gameServerPlayerCapacityTotal = stats.Int64("gameservers/player_capacity", "The available player capacity for gameservers", "1") + nodesCountStats = stats.Int64("nodes/count", "The count of nodes in the cluster", "1") + gsPerNodesCountStats = stats.Int64("gameservers_node/count", "The count of gameservers per node in the cluster", "1") + gsStateDurationSec = stats.Float64("gameservers_state/duration", "The duration of gameservers to be in a particular state", stats.UnitSeconds) stateViews = []*view.View{ { @@ -151,6 +153,13 @@ var ( Aggregation: view.Count(), TagKeys: []tag.Key{keyType, keyFleetName, keyNamespace}, }, + { + Name: gameServersAllocationsTotalName, + Measure: gameServerAllocationsTotalStats, + Description: "The total of gameserver allocations", + Aggregation: view.Count(), + TagKeys: []tag.Key{keyFleetName, keyNamespace}, + }, { Name: gameServersPlayerConnectedTotalName, Measure: gameServerPlayerConnectedTotal, diff --git a/pkg/metrics/controller_test.go b/pkg/metrics/controller_test.go index 70306feee5..764de5a842 100644 --- a/pkg/metrics/controller_test.go +++ b/pkg/metrics/controller_test.go @@ -428,6 +428,44 @@ func TestControllerGameServersTotal(t *testing.T) { }) } +func TestControllerGameServerAllocationsTotal(t *testing.T) { + mu.Lock() + defer mu.Unlock() + resetMetrics() + reader := metricexport.NewReader() + c := newFakeController() + defer c.close() + c.run(t) + + // deleted gs should not be counted + gs := gameServerWithFleetAndState("deleted", agonesv1.GameServerStateCreating) + c.gsWatch.Add(gs) + c.gsWatch.Delete(gs) + + generateGsEvents(15, agonesv1.GameServerStateAllocated, "test", c.gsWatch) + generateGsEvents(19, agonesv1.GameServerStateAllocated, "", c.gsWatch) + + expected := 34 + assert.Eventually(t, func() bool { + list, err := c.gameServerLister.GameServers(gs.ObjectMeta.Namespace).List(labels.Everything()) + if err != nil || list == nil { + return false + } + require.NoError(t, err) + return len(list) == expected + }, 10*time.Second, time.Second) + // While these values are tested above, the following test checks will provide a more detailed diff output + // in the case where the assert.Eventually(...) case fails, which makes failing tests easier to debug. + + list, err := c.gameServerLister.GameServers(gs.ObjectMeta.Namespace).List(labels.Everything()) + require.NoError(t, err) + require.Len(t, list, expected) + assertMetricData(t, c, func() {}, reader, gameServersAllocationsTotalName, []expectedMetricData{ + {labels: []string{"test", defaultNs}, val: int64(15)}, + {labels: []string{"none", defaultNs}, val: int64(19)}, + }) +} + func TestControllerFleetOnDeleting(t *testing.T) { mu.Lock() defer mu.Unlock() diff --git a/pkg/metrics/exporter_test.go b/pkg/metrics/exporter_test.go index 461933a5f3..b35879ebc0 100644 --- a/pkg/metrics/exporter_test.go +++ b/pkg/metrics/exporter_test.go @@ -212,6 +212,20 @@ func setupGameServer(t *testing.T, ctrl *fakeController) { return gs.Status.State == agonesv1.GameServerStateCreating }, 5*time.Second, time.Second) ctrl.collect() + + newGs := gs.DeepCopy() + newGs.Status.State = agonesv1.GameServerStateAllocated + ctrl.gsWatch.Modify(newGs) + + require.Eventually(t, func() bool { + gs, err := ctrl.gameServerLister.GameServers(gs.ObjectMeta.Namespace).Get(gs.ObjectMeta.Name) + if gs == nil || err != nil { + return false + } + assert.NoError(t, err) + return gs.Status.State == agonesv1.GameServerStateAllocated + }, 5*time.Second, time.Second) + ctrl.collect() } func setupFleet(_ *testing.T, ctrl *fakeController) { From 959052b0955f7edd8c9eef55f7f1c8879326b307 Mon Sep 17 00:00:00 2001 From: Nicholas Wiersma Date: Mon, 13 Apr 2026 11:39:31 +0200 Subject: [PATCH 2/9] chore: update CRD and install Signed-off-by: Nicholas Wiersma --- install/helm/agones/templates/crds/fleet.yaml | 3 +++ install/yaml/install.yaml | 3 +++ 2 files changed, 6 insertions(+) diff --git a/install/helm/agones/templates/crds/fleet.yaml b/install/helm/agones/templates/crds/fleet.yaml index c81f710fe5..037f7bb76e 100644 --- a/install/helm/agones/templates/crds/fleet.yaml +++ b/install/helm/agones/templates/crds/fleet.yaml @@ -157,6 +157,9 @@ spec: allocatedReplicas: type: integer minimum: 0 + allocations: + type: integer + minimum: 0 players: type: object nullable: true diff --git a/install/yaml/install.yaml b/install/yaml/install.yaml index 07ceb7dc50..9d1daedb7d 100644 --- a/install/yaml/install.yaml +++ b/install/yaml/install.yaml @@ -6585,6 +6585,9 @@ spec: allocatedReplicas: type: integer minimum: 0 + allocations: + type: integer + minimum: 0 players: type: object nullable: true From 31fecb2d9157b32e6106ca646799e83e7c755c80 Mon Sep 17 00:00:00 2001 From: Nicholas Wiersma Date: Mon, 13 Apr 2026 11:43:33 +0200 Subject: [PATCH 3/9] feat: add e2e test Signed-off-by: Nicholas Wiersma --- test/e2e/fleet_test.go | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/test/e2e/fleet_test.go b/test/e2e/fleet_test.go index 932d5f795d..d1b255f442 100644 --- a/test/e2e/fleet_test.go +++ b/test/e2e/fleet_test.go @@ -762,6 +762,26 @@ func TestFleetUpdates(t *testing.T) { } } +func TestFleetCountsAllocations(t *testing.T) { + t.Parallel() + ctx := context.Background() + + client := framework.AgonesClient.AgonesV1() + flt := defaultFleet(framework.Namespace) + flt.Spec.Replicas = 5 + flt, err := client.Fleets(framework.Namespace).Create(ctx, flt, metav1.CreateOptions{}) + require.NoError(t, err) + defer client.Fleets(framework.Namespace).Delete(ctx, flt.ObjectMeta.Name, metav1.DeleteOptions{}) // nolint:errcheck + + framework.AssertFleetCondition(t, flt, e2e.FleetReadyCount(flt.Spec.Replicas)) + _ = framework.CreateAndApplyAllocation(t, flt) + _ = framework.CreateAndApplyAllocation(t, flt) + + framework.AssertFleetCondition(t, flt, func(_ *logrus.Entry, fleet *agonesv1.Fleet) bool { + return fleet.Status.Allocations == 2 + }) +} + func TestUpdateGameServerConfigurationInFleet(t *testing.T) { t.Parallel() ctx := context.Background() From 2a44696261c277d52e90a2cc9e1451aae80306e4 Mon Sep 17 00:00:00 2001 From: Nicholas Wiersma Date: Tue, 14 Apr 2026 08:30:43 +0200 Subject: [PATCH 4/9] chore: update docs Signed-off-by: Nicholas Wiersma --- .../Reference/agones_crd_api_reference.html | 59 +++++++++++-------- .../en/docs/Reference/fleetautoscaler.md | 2 + 2 files changed, 37 insertions(+), 24 deletions(-) diff --git a/site/content/en/docs/Reference/agones_crd_api_reference.html b/site/content/en/docs/Reference/agones_crd_api_reference.html index 4c0c4754db..3221e512a2 100644 --- a/site/content/en/docs/Reference/agones_crd_api_reference.html +++ b/site/content/en/docs/Reference/agones_crd_api_reference.html @@ -3,7 +3,7 @@ description="Detailed list of Agones Custom Resource Definitions available" +++ -{{% feature expiryVersion="1.57.0" %}} +{{% feature expiryVersion="1.58.0" %}}

Packages: