From a7b959d390400fada61baa9e0390d50c99e53030 Mon Sep 17 00:00:00 2001 From: Tom Hromatka Date: Fri, 31 Jan 2025 13:09:44 -0700 Subject: [PATCH] adaptived: doc: Add example JSON config files Add several example JSON configuration files. Signed-off-by: Tom Hromatka Reviewed-by: George Kennedy Acked-by: Sidhartha Kumar --- adaptived/doc/examples/adjust-weight.json | 112 ++++++++++++++++++ .../doc/examples/determine-memorylow.json | 56 +++++++++ adaptived/doc/examples/kill-lowpriority.json | 29 +++++ .../doc/examples/lowmemfree-sendsignal.json | 29 +++++ adaptived/doc/examples/topmem-savelogs.json | 37 ++++++ 5 files changed, 263 insertions(+) create mode 100644 adaptived/doc/examples/adjust-weight.json create mode 100644 adaptived/doc/examples/determine-memorylow.json create mode 100644 adaptived/doc/examples/kill-lowpriority.json create mode 100644 adaptived/doc/examples/lowmemfree-sendsignal.json create mode 100644 adaptived/doc/examples/topmem-savelogs.json diff --git a/adaptived/doc/examples/adjust-weight.json b/adaptived/doc/examples/adjust-weight.json new file mode 100644 index 0000000..6f9e2db --- /dev/null +++ b/adaptived/doc/examples/adjust-weight.json @@ -0,0 +1,112 @@ +{ + "rules": [ + { + "name": "1) When there is pressure on the high-priority cgroup, increase its cpu.weight", + "description": "The effects in this example are somewhat contrived as cpu.weight should allow other cgroups to consume any extra cpu cycles that the high-priority cgroup does not need. But it's a good example of how adaptived can dynamically adjust settings as the system's workload changes.", + "note1": "Note that we rapidly increase the high-priority cgroup's cpu.weight when there is pressure, but we slowly decrease it when pressure abates. We want to err on the side of too much CPU assigned to the high-priority cgroup.", + "note2": "We are using the cgroup_setting effect in the first two rules because highpriority.scope has been delegated. If you're modifying a cgroup managed by systemd, you should use the sd_bus_setting effect. (See the last two rules in this file for examples.)", + "causes": [ + { + "name": "pressure", + "args": { + "pressure_file": "/sys/fs/cgroup/database.slice/highpriority.scope/cpu.pressure", + "threshold": 10.0, + "operator": "greaterthan", + "measurement": "full-avg10" + } + } + ], + "effects": [ + { + "name": "cgroup_setting", + "args": { + "setting": "/sys/fs/cgroup/database.slice/highpriority.scope/cpu.weight", + "value": 100, + "operator": "add", + "limit": 5000 + } + } + ] + }, + { + "name": "2) When pressure on the high-priority cgroup has abated, decrease the its cpu.weight", + "causes": [ + { + "name": "pressure", + "args": { + "pressure_file": "/sys/fs/cgroup/database.slice/highpriority.scope/cpu.pressure", + "threshold": 0.1, + "duration": 5000, + "operator": "lessthan", + "measurement": "full-avg10" + } + } + ], + "effects": [ + { + "name": "cgroup_setting", + "args": { + "setting": "/sys/fs/cgroup/database.slice/highpriority.scope/cpu.weight", + "value": 100, + "operator": "subtract", + "limit": 100 + } + } + ] + }, + { + "name": "3) When there is pressure on the database slice, increase its cpu.weight", + "note": "systemd owns the root cgroup and its immediate children. Therefore we need to request cgroup changes via DBus remote system calls into systemd", + "causes": [ + { + "name": "pressure", + "args": { + "pressure_file": "/sys/fs/cgroup/database.slice/cpu.pressure", + "threshold": 10.0, + "operator": "greaterthan", + "measurement": "full-avg10" + } + } + ], + "effects": [ + { + "name": "sd_bus_setting", + "args": { + "target": "database.slice", + "setting": "CPUWeight", + "value": 100, + "operator": "add", + "limit": 5000 + } + } + ] + }, + { + "name": "4) When pressure on database slice has abated, decrease its cpu.weight", + "causes": [ + { + "name": "pressure", + "args": { + "pressure_file": "/sys/fs/cgroup/database.slice/cpu.pressure", + "threshold": 0.1, + "duration": 5000, + "operator": "lessthan", + "measurement": "full-avg10" + } + } + ], + "effects": [ + { + "name": "sd_bus_setting", + "args": { + "target": "database.slice", + "setting": "CPUWeight", + "value": 100, + "operator": "subtract", + "limit": 100 + } + } + ] + } + ] +} diff --git a/adaptived/doc/examples/determine-memorylow.json b/adaptived/doc/examples/determine-memorylow.json new file mode 100644 index 0000000..43565d6 --- /dev/null +++ b/adaptived/doc/examples/determine-memorylow.json @@ -0,0 +1,56 @@ +{ + "rules": [ + { + "name": "When there is memory pressure, increase the cgroup's memory.high", + "description": "These rules can be used to determine an application's minimum memory required. In other words, these two rules will adjust memory.high to exert memory pressure on the application, and ultimately memory.high will settle approximately at the level where memory pressure is starting to be felt by the application. The memory.high value computed by this config file can then be used to populate the cgroup's memory.low to ensure that the application gets the minimum memory needed before it experiences memory pressure.", + "causes": [ + { + "name": "pressure", + "args": { + "pressure_file": "/sys/fs/cgroup/database.slice/db1.scope/memory.pressure", + "threshold": 8.0, + "duration": 20000, + "operator": "greaterthan", + "measurement": "some-avg60" + } + } + ], + "effects": [ + { + "name": "cgroup_memory_setting", + "args": { + "setting": "/sys/fs/cgroup/database.slice/db1.scope/memory.high", + "value": "100M", + "operator": "add" + } + } + ] + }, + { + "name": "When memory pressure has abated, decrease the cgroup's memory.high", + "causes": [ + { + "name": "pressure", + "args": { + "pressure_file": "/sys/fs/cgroup/database.slice/db1.scope/memory.pressure", + "threshold": 2.0, + "duration": 20000, + "operator": "lessthan", + "measurement": "some-avg60" + } + } + ], + "effects": [ + { + "name": "cgroup_memory_setting", + "args": { + "setting": "/sys/fs/cgroup/database.slice/db1.scope/memory.high", + "value": "100M", + "operator": "subtract", + "limit": "100M" + } + } + ] + } + ] +} diff --git a/adaptived/doc/examples/kill-lowpriority.json b/adaptived/doc/examples/kill-lowpriority.json new file mode 100644 index 0000000..3c622c9 --- /dev/null +++ b/adaptived/doc/examples/kill-lowpriority.json @@ -0,0 +1,29 @@ +{ + "rules": [ + { + "name": "When there is pressure on the high-priority cgroup, kill processes in the low-priority cgroup", + "description": "This configuration was motivated by Meta's oomd daemon. By placing important processes in a cgroup, say highpriority.scope, adaptived can ensure that these processes get utmost priority when there is additional load on the machine. In this example, adaptived kills processes in the low-priority cgroup until the pressure abates. Note that unlike oomd, adaptived can operated on memory, cpu, or io PSI pressure (or some other measurement entirely) and not just memory", + "causes": [ + { + "name": "pressure", + "args": { + "pressure_file": "/sys/fs/cgroup/database.slice/highpriority.scope/cpu.pressure", + "threshold": 10.0, + "operator": "greaterthan", + "measurement": "full-avg10", + "duration": 2000 + } + } + ], + "effects": [ + { + "name": "kill_cgroup", + "args": { + "cgroup": "/sys/fs/cgroup/database.slice/lowpriority.scope", + "count": 1 + } + } + ] + } + ] +} diff --git a/adaptived/doc/examples/lowmemfree-sendsignal.json b/adaptived/doc/examples/lowmemfree-sendsignal.json new file mode 100644 index 0000000..7ad739c --- /dev/null +++ b/adaptived/doc/examples/lowmemfree-sendsignal.json @@ -0,0 +1,29 @@ +{ + "rules": [ + { + "name": "When MemFree falls below 2 GB, send SIGALRM (signal 14) to the monitoring program", + "description": "This configuration file will monitor MemFree in /proc/meminfo, and when it falls below 2 GB, it will send a signal to the monitoring program. The monitoring program could then disable low-priority processes, migrate some processes to another machine, or if this is a VM, add more memory to the VM.", + "causes": [ + { + "name": "meminfo", + "args": { + "field": "MemFree", + "threshold": "2G", + "operator": "lessthan" + } + } + ], + "effects": [ + { + "name": "signal", + "args": { + "proc_names": [ + { "name": "monitor_program" } + ], + "signal": 14 + } + } + ] + } + ] +} diff --git a/adaptived/doc/examples/topmem-savelogs.json b/adaptived/doc/examples/topmem-savelogs.json new file mode 100644 index 0000000..8da6f65 --- /dev/null +++ b/adaptived/doc/examples/topmem-savelogs.json @@ -0,0 +1,37 @@ +{ + "rules": [ + { + "name": "When mem used (according to top) exceeds 4G, save off critical memory logs", + "description": "When memory exceeds a certain threshold, this could be a sign of a memory leak or other problem. Save off critical log files for later analysis. This could be useful on production machines where more intrusive debugging mechanisms aren't available.", + "causes": [ + { + "name": "top", + "args": { + "component": "mem", + "field": "used", + "threshold": "4G", + "operator": "greaterthan" + } + } + ], + "effects": [ + { + "name": "logger", + "args": { + "logfile": "anomaly.log", + "date_format": "%Y-%m-%d-%H:%M:%S-%Z", + "utc": true, + "files": [ + { + "file": "/proc/meminfo" + }, + { + "file": "/proc/slabinfo" + } + ] + } + } + ] + } + ] +}